1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*5a655dc6SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.164 1999/03/18 00:35:48 balay Exp bsmith $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 183b2fbd54SBarry Smith 19537820f0SBarry Smith /* 20537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2157b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2257b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2357b952d6SSatish Balay length of colmap equals the global matrix length. 2457b952d6SSatish Balay */ 255615d1e5SSatish Balay #undef __FUNC__ 265615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2857b952d6SSatish Balay { 2957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3057b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 316dee3f7eSBarry Smith int nbs = B->nbs,i,bs=B->bs; 326dee3f7eSBarry Smith #if defined (USE_CTABLE) 336dee3f7eSBarry Smith int ierr; 346dee3f7eSBarry Smith #endif 3557b952d6SSatish Balay 36d64ed03dSBarry Smith PetscFunctionBegin; 3748e59246SSatish Balay #if defined (USE_CTABLE) 38fa46199cSSatish Balay ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 3948e59246SSatish Balay for ( i=0; i<nbs; i++ ){ 4048e59246SSatish Balay ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 4148e59246SSatish Balay } 4248e59246SSatish Balay #else 43758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 4457b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4557b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4748e59246SSatish Balay #endif 483a40ed3dSBarry Smith PetscFunctionReturn(0); 4957b952d6SSatish Balay } 5057b952d6SSatish Balay 5180c1aa95SSatish Balay #define CHUNKSIZE 10 5280c1aa95SSatish Balay 53f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 5480c1aa95SSatish Balay { \ 5580c1aa95SSatish Balay \ 5680c1aa95SSatish Balay brow = row/bs; \ 5780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 5980c1aa95SSatish Balay bcol = col/bs; \ 6080c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 61ab26458aSBarry Smith low = 0; high = nrow; \ 62ab26458aSBarry Smith while (high-low > 3) { \ 63ab26458aSBarry Smith t = (low+high)/2; \ 64ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 65ab26458aSBarry Smith else low = t; \ 66ab26458aSBarry Smith } \ 67ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 6880c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 6980c1aa95SSatish Balay if (rp[_i] == bcol) { \ 7080c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 71eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 72eada6651SSatish Balay else *bap = value; \ 73ac7a638eSSatish Balay goto a_noinsert; \ 7480c1aa95SSatish Balay } \ 7580c1aa95SSatish Balay } \ 7689280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 77a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 7880c1aa95SSatish Balay if (nrow >= rmax) { \ 7980c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 8080c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 8180c1aa95SSatish Balay Scalar *new_a; \ 8280c1aa95SSatish Balay \ 83a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 8489280ab3SLois Curfman McInnes \ 8580c1aa95SSatish Balay /* malloc new storage space */ \ 8680c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 8780c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 8880c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 8980c1aa95SSatish Balay new_i = new_j + new_nz; \ 9080c1aa95SSatish Balay \ 9180c1aa95SSatish Balay /* copy over old data into new slots */ \ 9280c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 9380c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 9480c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 9580c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 9680c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 9780c1aa95SSatish Balay len*sizeof(int)); \ 9880c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 9980c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 10080c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 10180c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 10280c1aa95SSatish Balay /* free up old matrix storage */ \ 10380c1aa95SSatish Balay PetscFree(a->a); \ 10480c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 10580c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 10680c1aa95SSatish Balay a->singlemalloc = 1; \ 10780c1aa95SSatish Balay \ 10880c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 11080c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 11180c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 11280c1aa95SSatish Balay a->reallocs++; \ 11380c1aa95SSatish Balay a->nz++; \ 11480c1aa95SSatish Balay } \ 11580c1aa95SSatish Balay N = nrow++ - 1; \ 11680c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 11780c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 11880c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 11980c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 12080c1aa95SSatish Balay } \ 12180c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 12280c1aa95SSatish Balay rp[_i] = bcol; \ 12380c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 124ac7a638eSSatish Balay a_noinsert:; \ 12580c1aa95SSatish Balay ailen[brow] = nrow; \ 12680c1aa95SSatish Balay } 12757b952d6SSatish Balay 128ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129ac7a638eSSatish Balay { \ 130ac7a638eSSatish Balay \ 131ac7a638eSSatish Balay brow = row/bs; \ 132ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 134ac7a638eSSatish Balay bcol = col/bs; \ 135ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 136ac7a638eSSatish Balay low = 0; high = nrow; \ 137ac7a638eSSatish Balay while (high-low > 3) { \ 138ac7a638eSSatish Balay t = (low+high)/2; \ 139ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 140ac7a638eSSatish Balay else low = t; \ 141ac7a638eSSatish Balay } \ 142ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 143ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 144ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 145ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 146ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 147ac7a638eSSatish Balay else *bap = value; \ 148ac7a638eSSatish Balay goto b_noinsert; \ 149ac7a638eSSatish Balay } \ 150ac7a638eSSatish Balay } \ 15189280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 152a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153ac7a638eSSatish Balay if (nrow >= rmax) { \ 154ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15574c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156ac7a638eSSatish Balay Scalar *new_a; \ 157ac7a638eSSatish Balay \ 158a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 15989280ab3SLois Curfman McInnes \ 160ac7a638eSSatish Balay /* malloc new storage space */ \ 16174c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 164ac7a638eSSatish Balay new_i = new_j + new_nz; \ 165ac7a638eSSatish Balay \ 166ac7a638eSSatish Balay /* copy over old data into new slots */ \ 167ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 16874c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172ac7a638eSSatish Balay len*sizeof(int)); \ 173ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177ac7a638eSSatish Balay /* free up old matrix storage */ \ 17874c639caSSatish Balay PetscFree(b->a); \ 17974c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 18074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 18174c639caSSatish Balay b->singlemalloc = 1; \ 182ac7a638eSSatish Balay \ 183ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 18574c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 18674c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 18774c639caSSatish Balay b->reallocs++; \ 18874c639caSSatish Balay b->nz++; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay N = nrow++ - 1; \ 191ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 192ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 193ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 194ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195ac7a638eSSatish Balay } \ 196ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197ac7a638eSSatish Balay rp[_i] = bcol; \ 198ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 199ac7a638eSSatish Balay b_noinsert:; \ 200ac7a638eSSatish Balay bilen[brow] = nrow; \ 201ac7a638eSSatish Balay } 202ac7a638eSSatish Balay 2035615d1e5SSatish Balay #undef __FUNC__ 2045615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 205ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 20657b952d6SSatish Balay { 20757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20857b952d6SSatish Balay Scalar value; 2094fa0d573SSatish Balay int ierr,i,j,row,col; 2104fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2114fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2124fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 21357b952d6SSatish Balay 214eada6651SSatish Balay /* Some Variables required in the macro */ 21580c1aa95SSatish Balay Mat A = baij->A; 21680c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218ac7a638eSSatish Balay Scalar *aa=a->a; 219ac7a638eSSatish Balay 220ac7a638eSSatish Balay Mat B = baij->B; 221ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223ac7a638eSSatish Balay Scalar *ba=b->a; 224ac7a638eSSatish Balay 225ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 227ac7a638eSSatish Balay Scalar *ap,*bap; 22880c1aa95SSatish Balay 229d64ed03dSBarry Smith PetscFunctionBegin; 23057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2315ef9f2a5SBarry Smith if (im[i] < 0) continue; 2323a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 233a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234639f9d9dSBarry Smith #endif 23557b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 23657b952d6SSatish Balay row = im[i] - rstart_orig; 23757b952d6SSatish Balay for ( j=0; j<n; j++ ) { 23857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 23957b952d6SSatish Balay col = in[j] - cstart_orig; 24057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 24280c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 24373959e64SBarry Smith } else if (in[j] < 0) continue; 2443a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 245a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 246639f9d9dSBarry Smith #endif 24757b952d6SSatish Balay else { 24857b952d6SSatish Balay if (mat->was_assembled) { 249905e6a2fSBarry Smith if (!baij->colmap) { 250905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 251905e6a2fSBarry Smith } 25248e59246SSatish Balay #if defined (USE_CTABLE) 253fa46199cSSatish Balay ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 254fa46199cSSatish Balay col = col - 1 + in[j]%bs; 25548e59246SSatish Balay #else 256905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 25748e59246SSatish Balay #endif 25857b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 25957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 26057b952d6SSatish Balay col = in[j]; 2619bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2629bf004c3SSatish Balay B = baij->B; 2639bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2649bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2659bf004c3SSatish Balay ba=b->a; 26657b952d6SSatish Balay } 267d64ed03dSBarry Smith } else col = in[j]; 26857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 270ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 27157b952d6SSatish Balay } 27257b952d6SSatish Balay } 273d64ed03dSBarry Smith } else { 27490f02eecSBarry Smith if ( !baij->donotstash) { 275ff2fd236SBarry Smith if (roworiented) { 2768798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 277ff2fd236SBarry Smith } else { 2788798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 27957b952d6SSatish Balay } 28057b952d6SSatish Balay } 28157b952d6SSatish Balay } 28290f02eecSBarry Smith } 2833a40ed3dSBarry Smith PetscFunctionReturn(0); 28457b952d6SSatish Balay } 28557b952d6SSatish Balay 286ab26458aSBarry Smith #undef __FUNC__ 287ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 288ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 289ab26458aSBarry Smith { 290ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 29130793edcSSatish Balay Scalar *value,*barray=baij->barray; 2927ef1d9bdSSatish Balay int ierr,i,j,ii,jj,row,col; 293ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 294ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 295ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 296ab26458aSBarry Smith 29730793edcSSatish Balay if(!barray) { 29847513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 29930793edcSSatish Balay } 30030793edcSSatish Balay 301ab26458aSBarry Smith if (roworiented) { 302ab26458aSBarry Smith stepval = (n-1)*bs; 303ab26458aSBarry Smith } else { 304ab26458aSBarry Smith stepval = (m-1)*bs; 305ab26458aSBarry Smith } 306ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 3075ef9f2a5SBarry Smith if (im[i] < 0) continue; 3083a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3095ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 310ab26458aSBarry Smith #endif 311ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 312ab26458aSBarry Smith row = im[i] - rstart; 313ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 31415b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 31515b57d14SSatish Balay if ((roworiented) && (n == 1)) { 31615b57d14SSatish Balay barray = v + i*bs2; 31715b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 31815b57d14SSatish Balay barray = v + j*bs2; 31915b57d14SSatish Balay } else { /* Here a copy is required */ 320ab26458aSBarry Smith if (roworiented) { 321ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 322ab26458aSBarry Smith } else { 323ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 324abef11f7SSatish Balay } 32547513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 32647513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 32730793edcSSatish Balay *barray++ = *value++; 32847513183SBarry Smith } 32947513183SBarry Smith } 33030793edcSSatish Balay barray -=bs2; 33115b57d14SSatish Balay } 332abef11f7SSatish Balay 333abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 334abef11f7SSatish Balay col = in[j] - cstart; 33530793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 336ab26458aSBarry Smith } 3375ef9f2a5SBarry Smith else if (in[j] < 0) continue; 3383a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3395ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 340ab26458aSBarry Smith #endif 341ab26458aSBarry Smith else { 342ab26458aSBarry Smith if (mat->was_assembled) { 343ab26458aSBarry Smith if (!baij->colmap) { 344ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 345ab26458aSBarry Smith } 346a5eb4965SSatish Balay 3473a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 34848e59246SSatish Balay #if defined (USE_CTABLE) 349fa46199cSSatish Balay { int data; 350fa46199cSSatish Balay ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 351fa46199cSSatish Balay if((data - 1) % bs) 35248e59246SSatish Balay {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 353fa46199cSSatish Balay } 35448e59246SSatish Balay #else 355a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 356a5eb4965SSatish Balay #endif 35748e59246SSatish Balay #endif 35848e59246SSatish Balay #if defined (USE_CTABLE) 359fa46199cSSatish Balay ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 360fa46199cSSatish Balay col = (col - 1)/bs; 36148e59246SSatish Balay #else 362a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 36348e59246SSatish Balay #endif 364ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 365ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 366ab26458aSBarry Smith col = in[j]; 367ab26458aSBarry Smith } 368ab26458aSBarry Smith } 369ab26458aSBarry Smith else col = in[j]; 37030793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 371ab26458aSBarry Smith } 372ab26458aSBarry Smith } 373d64ed03dSBarry Smith } else { 374ab26458aSBarry Smith if (!baij->donotstash) { 375ff2fd236SBarry Smith if (roworiented) { 3768798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 377ff2fd236SBarry Smith } else { 3788798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 379ff2fd236SBarry Smith } 380abef11f7SSatish Balay } 381ab26458aSBarry Smith } 382ab26458aSBarry Smith } 3833a40ed3dSBarry Smith PetscFunctionReturn(0); 384ab26458aSBarry Smith } 3850bdbc534SSatish Balay #define HASH_KEY 0.6180339887 386c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 387c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 388c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 3895615d1e5SSatish Balay #undef __FUNC__ 3900bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 3910bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3920bdbc534SSatish Balay { 3930bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3940bdbc534SSatish Balay int ierr,i,j,row,col; 3950bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 396c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 397c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 398c2760754SSatish Balay double tmp; 399b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 4004a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4014a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4024a15367fSSatish Balay #endif 4030bdbc534SSatish Balay 4040bdbc534SSatish Balay PetscFunctionBegin; 4050bdbc534SSatish Balay 4060bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4070bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4080bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4090bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4100bdbc534SSatish Balay #endif 4110bdbc534SSatish Balay row = im[i]; 412c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4130bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4140bdbc534SSatish Balay col = in[j]; 4150bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4160bdbc534SSatish Balay /* Look up into the Hash Table */ 417c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 418c2760754SSatish Balay h1 = HASH(size,key,tmp); 4190bdbc534SSatish Balay 420c2760754SSatish Balay 421c2760754SSatish Balay idx = h1; 422187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 423187ce0cbSSatish Balay insert_ct++; 424187ce0cbSSatish Balay total_ct++; 425187ce0cbSSatish Balay if (HT[idx] != key) { 426187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 427187ce0cbSSatish Balay if (idx == size) { 428187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 429187ce0cbSSatish Balay if (idx == h1) { 430187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431187ce0cbSSatish Balay } 432187ce0cbSSatish Balay } 433187ce0cbSSatish Balay } 434187ce0cbSSatish Balay #else 435c2760754SSatish Balay if (HT[idx] != key) { 436c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 437c2760754SSatish Balay if (idx == size) { 438c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 439c2760754SSatish Balay if (idx == h1) { 440c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 441c2760754SSatish Balay } 442c2760754SSatish Balay } 443c2760754SSatish Balay } 444187ce0cbSSatish Balay #endif 445c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 446c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 447c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4480bdbc534SSatish Balay } 4490bdbc534SSatish Balay } else { 4500bdbc534SSatish Balay if (!baij->donotstash) { 451ff2fd236SBarry Smith if (roworiented) { 4528798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453ff2fd236SBarry Smith } else { 4548798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4550bdbc534SSatish Balay } 4560bdbc534SSatish Balay } 4570bdbc534SSatish Balay } 4580bdbc534SSatish Balay } 459187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 460187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 461187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 462187ce0cbSSatish Balay #endif 4630bdbc534SSatish Balay PetscFunctionReturn(0); 4640bdbc534SSatish Balay } 4650bdbc534SSatish Balay 4660bdbc534SSatish Balay #undef __FUNC__ 4670bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4680bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4690bdbc534SSatish Balay { 4700bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4718798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 4720bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 473b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 474c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 475c2760754SSatish Balay double tmp; 476187ce0cbSSatish Balay Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 4774a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4784a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4794a15367fSSatish Balay #endif 4800bdbc534SSatish Balay 481d0a41580SSatish Balay PetscFunctionBegin; 482d0a41580SSatish Balay 4830bdbc534SSatish Balay if (roworiented) { 4840bdbc534SSatish Balay stepval = (n-1)*bs; 4850bdbc534SSatish Balay } else { 4860bdbc534SSatish Balay stepval = (m-1)*bs; 4870bdbc534SSatish Balay } 4880bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4890bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4900bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4910bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4920bdbc534SSatish Balay #endif 4930bdbc534SSatish Balay row = im[i]; 494187ce0cbSSatish Balay v_t = v + i*bs2; 495c2760754SSatish Balay if (row >= rstart && row < rend) { 4960bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4970bdbc534SSatish Balay col = in[j]; 4980bdbc534SSatish Balay 4990bdbc534SSatish Balay /* Look up into the Hash Table */ 500c2760754SSatish Balay key = row*Nbs+col+1; 501c2760754SSatish Balay h1 = HASH(size,key,tmp); 5020bdbc534SSatish Balay 503c2760754SSatish Balay idx = h1; 504187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 505187ce0cbSSatish Balay total_ct++; 506187ce0cbSSatish Balay insert_ct++; 507187ce0cbSSatish Balay if (HT[idx] != key) { 508187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509187ce0cbSSatish Balay if (idx == size) { 510187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511187ce0cbSSatish Balay if (idx == h1) { 512187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 513187ce0cbSSatish Balay } 514187ce0cbSSatish Balay } 515187ce0cbSSatish Balay } 516187ce0cbSSatish Balay #else 517c2760754SSatish Balay if (HT[idx] != key) { 518c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519c2760754SSatish Balay if (idx == size) { 520c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521c2760754SSatish Balay if (idx == h1) { 522c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 523c2760754SSatish Balay } 524c2760754SSatish Balay } 525c2760754SSatish Balay } 526187ce0cbSSatish Balay #endif 527c2760754SSatish Balay baij_a = HD[idx]; 5280bdbc534SSatish Balay if (roworiented) { 529c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 531187ce0cbSSatish Balay value = v_t; 532187ce0cbSSatish Balay v_t += bs; 533fef45726SSatish Balay if (addv == ADD_VALUES) { 534c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 535c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 536fef45726SSatish Balay baij_a[jj] += *value++; 537b4cc0f5aSSatish Balay } 538b4cc0f5aSSatish Balay } 539fef45726SSatish Balay } else { 540c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 541c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 542fef45726SSatish Balay baij_a[jj] = *value++; 543fef45726SSatish Balay } 544fef45726SSatish Balay } 545fef45726SSatish Balay } 5460bdbc534SSatish Balay } else { 5470bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 548fef45726SSatish Balay if (addv == ADD_VALUES) { 549b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 5500bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 551fef45726SSatish Balay baij_a[jj] += *value++; 552fef45726SSatish Balay } 553fef45726SSatish Balay } 554fef45726SSatish Balay } else { 555fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 556fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 557fef45726SSatish Balay baij_a[jj] = *value++; 558fef45726SSatish Balay } 559b4cc0f5aSSatish Balay } 5600bdbc534SSatish Balay } 5610bdbc534SSatish Balay } 5620bdbc534SSatish Balay } 5630bdbc534SSatish Balay } else { 5640bdbc534SSatish Balay if (!baij->donotstash) { 5650bdbc534SSatish Balay if (roworiented) { 5668798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 5670bdbc534SSatish Balay } else { 5688798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 5690bdbc534SSatish Balay } 5700bdbc534SSatish Balay } 5710bdbc534SSatish Balay } 5720bdbc534SSatish Balay } 573187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 574187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 575187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 576187ce0cbSSatish Balay #endif 5770bdbc534SSatish Balay PetscFunctionReturn(0); 5780bdbc534SSatish Balay } 579133cdb44SSatish Balay 5800bdbc534SSatish Balay #undef __FUNC__ 5815615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 582ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 583d6de1c52SSatish Balay { 584d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 585d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 58648e59246SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 587d6de1c52SSatish Balay 588133cdb44SSatish Balay PetscFunctionBegin; 589d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 590a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 591a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 592d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593d6de1c52SSatish Balay row = idxm[i] - bsrstart; 594d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 595a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 596a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 597d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 598d6de1c52SSatish Balay col = idxn[j] - bscstart; 59998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 600d64ed03dSBarry Smith } else { 601905e6a2fSBarry Smith if (!baij->colmap) { 602905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603905e6a2fSBarry Smith } 60448e59246SSatish Balay #if defined (USE_CTABLE) 605fa46199cSSatish Balay ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 606fa46199cSSatish Balay data --; 60748e59246SSatish Balay #else 60848e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 60948e59246SSatish Balay #endif 61048e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611d9d09a02SSatish Balay else { 61248e59246SSatish Balay col = data + idxn[j]%bs; 61398dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 614d6de1c52SSatish Balay } 615d6de1c52SSatish Balay } 616d6de1c52SSatish Balay } 617d64ed03dSBarry Smith } else { 618a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 619d6de1c52SSatish Balay } 620d6de1c52SSatish Balay } 6213a40ed3dSBarry Smith PetscFunctionReturn(0); 622d6de1c52SSatish Balay } 623d6de1c52SSatish Balay 6245615d1e5SSatish Balay #undef __FUNC__ 6255615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 626ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 627d6de1c52SSatish Balay { 628d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 629d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 630acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 631d6de1c52SSatish Balay double sum = 0.0; 632d6de1c52SSatish Balay Scalar *v; 633d6de1c52SSatish Balay 634d64ed03dSBarry Smith PetscFunctionBegin; 635d6de1c52SSatish Balay if (baij->size == 1) { 636d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 637d6de1c52SSatish Balay } else { 638d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 639d6de1c52SSatish Balay v = amat->a; 640d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 6413a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 642e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 643d6de1c52SSatish Balay #else 644d6de1c52SSatish Balay sum += (*v)*(*v); v++; 645d6de1c52SSatish Balay #endif 646d6de1c52SSatish Balay } 647d6de1c52SSatish Balay v = bmat->a; 648d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 6493a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 650e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 651d6de1c52SSatish Balay #else 652d6de1c52SSatish Balay sum += (*v)*(*v); v++; 653d6de1c52SSatish Balay #endif 654d6de1c52SSatish Balay } 655ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 656d6de1c52SSatish Balay *norm = sqrt(*norm); 657d64ed03dSBarry Smith } else { 658e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 659d6de1c52SSatish Balay } 660d64ed03dSBarry Smith } 6613a40ed3dSBarry Smith PetscFunctionReturn(0); 662d6de1c52SSatish Balay } 66357b952d6SSatish Balay 664bd7f49f5SSatish Balay 665fef45726SSatish Balay /* 666fef45726SSatish Balay Creates the hash table, and sets the table 667fef45726SSatish Balay This table is created only once. 668fef45726SSatish Balay If new entried need to be added to the matrix 669fef45726SSatish Balay then the hash table has to be destroyed and 670fef45726SSatish Balay recreated. 671fef45726SSatish Balay */ 672fef45726SSatish Balay #undef __FUNC__ 673fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 674d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 675596b8d2eSBarry Smith { 676596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 677596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 678596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 6790bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 6804a15367fSSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart; 681187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 682fef45726SSatish Balay int *HT,key; 6830bdbc534SSatish Balay Scalar **HD; 684c2760754SSatish Balay double tmp; 6854a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 6864a15367fSSatish Balay int ct=0,max=0; 6874a15367fSSatish Balay #endif 688fef45726SSatish Balay 689d64ed03dSBarry Smith PetscFunctionBegin; 6900bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 6910bdbc534SSatish Balay size = baij->ht_size; 692fef45726SSatish Balay 6930bdbc534SSatish Balay if (baij->ht) { 6940bdbc534SSatish Balay PetscFunctionReturn(0); 695596b8d2eSBarry Smith } 6960bdbc534SSatish Balay 697fef45726SSatish Balay /* Allocate Memory for Hash Table */ 698b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 699b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 700b9e4cc15SSatish Balay HD = baij->hd; 701a07cd24cSSatish Balay HT = baij->ht; 702b9e4cc15SSatish Balay 703b9e4cc15SSatish Balay 704c2760754SSatish Balay PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 7050bdbc534SSatish Balay 706596b8d2eSBarry Smith 707596b8d2eSBarry Smith /* Loop Over A */ 7080bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 709596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 7100bdbc534SSatish Balay row = i+rstart; 7110bdbc534SSatish Balay col = aj[j]+cstart; 712596b8d2eSBarry Smith 713187ce0cbSSatish Balay key = row*Nbs + col + 1; 714c2760754SSatish Balay h1 = HASH(size,key,tmp); 7150bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7160bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7170bdbc534SSatish Balay HT[(h1+k)%size] = key; 7180bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 719596b8d2eSBarry Smith break; 720187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 721187ce0cbSSatish Balay } else { 722187ce0cbSSatish Balay ct++; 723187ce0cbSSatish Balay #endif 724596b8d2eSBarry Smith } 725187ce0cbSSatish Balay } 726187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 727187ce0cbSSatish Balay if (k> max) max = k; 728187ce0cbSSatish Balay #endif 729596b8d2eSBarry Smith } 730596b8d2eSBarry Smith } 731596b8d2eSBarry Smith /* Loop Over B */ 7320bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 733596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 7340bdbc534SSatish Balay row = i+rstart; 7350bdbc534SSatish Balay col = garray[bj[j]]; 736187ce0cbSSatish Balay key = row*Nbs + col + 1; 737c2760754SSatish Balay h1 = HASH(size,key,tmp); 7380bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7390bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7400bdbc534SSatish Balay HT[(h1+k)%size] = key; 7410bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 742596b8d2eSBarry Smith break; 743187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 744187ce0cbSSatish Balay } else { 745187ce0cbSSatish Balay ct++; 746187ce0cbSSatish Balay #endif 747596b8d2eSBarry Smith } 748187ce0cbSSatish Balay } 749187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 750187ce0cbSSatish Balay if (k> max) max = k; 751187ce0cbSSatish Balay #endif 752596b8d2eSBarry Smith } 753596b8d2eSBarry Smith } 754596b8d2eSBarry Smith 755596b8d2eSBarry Smith /* Print Summary */ 756187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 757c2760754SSatish Balay for ( i=0,j=0; i<size; i++) 758596b8d2eSBarry Smith if (HT[i]) {j++;} 759187ce0cbSSatish Balay PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 760187ce0cbSSatish Balay (j== 0)? 0.0:((double)(ct+j))/j,max); 761187ce0cbSSatish Balay #endif 7623a40ed3dSBarry Smith PetscFunctionReturn(0); 763596b8d2eSBarry Smith } 76457b952d6SSatish Balay 765bbb85fb3SSatish Balay #undef __FUNC__ 766bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 767bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 768bbb85fb3SSatish Balay { 769bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 770ff2fd236SBarry Smith int ierr,nstash,reallocs; 771bbb85fb3SSatish Balay InsertMode addv; 772bbb85fb3SSatish Balay 773bbb85fb3SSatish Balay PetscFunctionBegin; 774bbb85fb3SSatish Balay if (baij->donotstash) { 775bbb85fb3SSatish Balay PetscFunctionReturn(0); 776bbb85fb3SSatish Balay } 777bbb85fb3SSatish Balay 778bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 779bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 780bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 781bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 782bbb85fb3SSatish Balay } 783bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 784bbb85fb3SSatish Balay 7858798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr); 7868798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr); 7878798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 788*5a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 7898798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 790*5a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 791bbb85fb3SSatish Balay PetscFunctionReturn(0); 792bbb85fb3SSatish Balay } 793bbb85fb3SSatish Balay 794bbb85fb3SSatish Balay #undef __FUNC__ 795bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 796bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 797bbb85fb3SSatish Balay { 798bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 799a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 800a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 801a2d1c673SSatish Balay int *row,*col,other_disassembled,r1,r2,r3; 802bbb85fb3SSatish Balay Scalar *val; 803bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 804bbb85fb3SSatish Balay 805bbb85fb3SSatish Balay PetscFunctionBegin; 806bbb85fb3SSatish Balay if (!baij->donotstash) { 807a2d1c673SSatish Balay while (1) { 8088798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 809a2d1c673SSatish Balay if (!flg) break; 810a2d1c673SSatish Balay 811bbb85fb3SSatish Balay for ( i=0; i<n; ) { 812bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 813bbb85fb3SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 814bbb85fb3SSatish Balay if (j < n) ncols = j-i; 815bbb85fb3SSatish Balay else ncols = n-i; 816bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 817bbb85fb3SSatish Balay ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 818bbb85fb3SSatish Balay i = j; 819bbb85fb3SSatish Balay } 820bbb85fb3SSatish Balay } 8218798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr); 822a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 823a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 824a2d1c673SSatish Balay restore the original flags */ 825a2d1c673SSatish Balay r1 = baij->roworiented; 826a2d1c673SSatish Balay r2 = a->roworiented; 827a2d1c673SSatish Balay r3 = b->roworiented; 828a2d1c673SSatish Balay baij->roworiented = 0; 829a2d1c673SSatish Balay a->roworiented = 0; 830a2d1c673SSatish Balay b->roworiented = 0; 831a2d1c673SSatish Balay while (1) { 8328798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 833a2d1c673SSatish Balay if (!flg) break; 834a2d1c673SSatish Balay 835a2d1c673SSatish Balay for ( i=0; i<n; ) { 836a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 837a2d1c673SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 838a2d1c673SSatish Balay if (j < n) ncols = j-i; 839a2d1c673SSatish Balay else ncols = n-i; 840a2d1c673SSatish Balay ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 841a2d1c673SSatish Balay i = j; 842a2d1c673SSatish Balay } 843a2d1c673SSatish Balay } 8448798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr); 845a2d1c673SSatish Balay baij->roworiented = r1; 846a2d1c673SSatish Balay a->roworiented = r2; 847a2d1c673SSatish Balay b->roworiented = r3; 848bbb85fb3SSatish Balay } 849bbb85fb3SSatish Balay 850bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 851bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 852bbb85fb3SSatish Balay 853bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 854bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 855bbb85fb3SSatish Balay /* 856bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 857bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 858bbb85fb3SSatish Balay */ 859bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 860bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 861bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 862bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 863bbb85fb3SSatish Balay } 864bbb85fb3SSatish Balay } 865bbb85fb3SSatish Balay 866bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 867bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 868bbb85fb3SSatish Balay } 869bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 870bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 871bbb85fb3SSatish Balay 872bbb85fb3SSatish Balay #if defined(USE_PETSC_BOPT_g) 873bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 874bbb85fb3SSatish Balay PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 875bbb85fb3SSatish Balay ((double)baij->ht_total_ct)/baij->ht_insert_ct); 876bbb85fb3SSatish Balay baij->ht_total_ct = 0; 877bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 878bbb85fb3SSatish Balay } 879bbb85fb3SSatish Balay #endif 880bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 881bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 882bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 883bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 884bbb85fb3SSatish Balay } 885bbb85fb3SSatish Balay 886bbb85fb3SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 887bbb85fb3SSatish Balay PetscFunctionReturn(0); 888bbb85fb3SSatish Balay } 88957b952d6SSatish Balay 8905615d1e5SSatish Balay #undef __FUNC__ 8915615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 89257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 89357b952d6SSatish Balay { 89457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 89557b952d6SSatish Balay int ierr; 89657b952d6SSatish Balay 897d64ed03dSBarry Smith PetscFunctionBegin; 89857b952d6SSatish Balay if (baij->size == 1) { 89957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 900a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 9013a40ed3dSBarry Smith PetscFunctionReturn(0); 90257b952d6SSatish Balay } 90357b952d6SSatish Balay 9045615d1e5SSatish Balay #undef __FUNC__ 9057b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 9067b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 90757b952d6SSatish Balay { 90857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 90977ed5343SBarry Smith int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 91057b952d6SSatish Balay FILE *fd; 91157b952d6SSatish Balay ViewerType vtype; 91257b952d6SSatish Balay 913d64ed03dSBarry Smith PetscFunctionBegin; 91457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 9153f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 916d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 917639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 9184e220ebcSLois Curfman McInnes MatInfo info; 91957b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 92057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 921d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 92257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 92357b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 9244e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 9254e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 9264e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 9274e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 9284e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 9294e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 93057b952d6SSatish Balay fflush(fd); 93157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 93257b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 9333a40ed3dSBarry Smith PetscFunctionReturn(0); 934d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 935bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 9363a40ed3dSBarry Smith PetscFunctionReturn(0); 93757b952d6SSatish Balay } 93857b952d6SSatish Balay } 93957b952d6SSatish Balay 9403f1db9ecSBarry Smith if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 94157b952d6SSatish Balay Draw draw; 94257b952d6SSatish Balay PetscTruth isnull; 94377ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 9443a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 94557b952d6SSatish Balay } 94657b952d6SSatish Balay 94757b952d6SSatish Balay if (size == 1) { 94857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 949d64ed03dSBarry Smith } else { 95057b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 95157b952d6SSatish Balay Mat A; 95257b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 95340011551SBarry Smith int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 95457b952d6SSatish Balay int mbs = baij->mbs; 95557b952d6SSatish Balay Scalar *a; 95657b952d6SSatish Balay 95757b952d6SSatish Balay if (!rank) { 95855843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 959d64ed03dSBarry Smith } else { 96055843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 96157b952d6SSatish Balay } 96257b952d6SSatish Balay PLogObjectParent(mat,A); 96357b952d6SSatish Balay 96457b952d6SSatish Balay /* copy over the A part */ 96557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 96657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 96757b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 96857b952d6SSatish Balay 96957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 97057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 97157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 97257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 97357b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 97457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 975cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 976cee3aa6bSSatish Balay col++; a += bs; 97757b952d6SSatish Balay } 97857b952d6SSatish Balay } 97957b952d6SSatish Balay } 98057b952d6SSatish Balay /* copy over the B part */ 98157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 98257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 98357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 98457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 98557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 98657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 98757b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 98857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 989cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 990cee3aa6bSSatish Balay col++; a += bs; 99157b952d6SSatish Balay } 99257b952d6SSatish Balay } 99357b952d6SSatish Balay } 99457b952d6SSatish Balay PetscFree(rvals); 9956d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 9966d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 99755843e3eSBarry Smith /* 99855843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 99955843e3eSBarry Smith synchronized across all processors that share the Draw object 100055843e3eSBarry Smith */ 10013f1db9ecSBarry Smith if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 100257b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 100357b952d6SSatish Balay } 100457b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 100557b952d6SSatish Balay } 10063a40ed3dSBarry Smith PetscFunctionReturn(0); 100757b952d6SSatish Balay } 100857b952d6SSatish Balay 100957b952d6SSatish Balay 101057b952d6SSatish Balay 10115615d1e5SSatish Balay #undef __FUNC__ 10125615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1013e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 101457b952d6SSatish Balay { 101557b952d6SSatish Balay int ierr; 101657b952d6SSatish Balay ViewerType vtype; 101757b952d6SSatish Balay 1018d64ed03dSBarry Smith PetscFunctionBegin; 101957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 10203f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 10217b2a1423SBarry Smith PetscTypeCompare(vtype,SOCKET_VIEWER)) { 10227b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 10233f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 10243a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 10255cd90555SBarry Smith } else { 10265cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 102757b952d6SSatish Balay } 10283a40ed3dSBarry Smith PetscFunctionReturn(0); 102957b952d6SSatish Balay } 103057b952d6SSatish Balay 10315615d1e5SSatish Balay #undef __FUNC__ 10325615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1033e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 103479bdfe76SSatish Balay { 103579bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 103679bdfe76SSatish Balay int ierr; 103779bdfe76SSatish Balay 1038d64ed03dSBarry Smith PetscFunctionBegin; 103998dd23e9SBarry Smith if (--mat->refct > 0) PetscFunctionReturn(0); 104098dd23e9SBarry Smith 104198dd23e9SBarry Smith if (mat->mapping) { 104298dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 104398dd23e9SBarry Smith } 104498dd23e9SBarry Smith if (mat->bmapping) { 104598dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 104698dd23e9SBarry Smith } 104761b13de0SBarry Smith if (mat->rmap) { 104861b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 104961b13de0SBarry Smith } 105061b13de0SBarry Smith if (mat->cmap) { 105161b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 105261b13de0SBarry Smith } 10533a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 1054e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 105579bdfe76SSatish Balay #endif 105679bdfe76SSatish Balay 10578798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr); 10588798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr); 10598798bf22SSatish Balay 106079bdfe76SSatish Balay PetscFree(baij->rowners); 106179bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 106279bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 106348e59246SSatish Balay #if defined (USE_CTABLE) 106448e59246SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 106548e59246SSatish Balay #else 106679bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 106748e59246SSatish Balay #endif 106879bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 106979bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 107079bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 107179bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 107230793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1073b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 107479bdfe76SSatish Balay PetscFree(baij); 107579bdfe76SSatish Balay PLogObjectDestroy(mat); 107679bdfe76SSatish Balay PetscHeaderDestroy(mat); 10773a40ed3dSBarry Smith PetscFunctionReturn(0); 107879bdfe76SSatish Balay } 107979bdfe76SSatish Balay 10805615d1e5SSatish Balay #undef __FUNC__ 10815615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1082ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1083cee3aa6bSSatish Balay { 1084cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 108547b4a8eaSLois Curfman McInnes int ierr, nt; 1086cee3aa6bSSatish Balay 1087d64ed03dSBarry Smith PetscFunctionBegin; 1088e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 108947b4a8eaSLois Curfman McInnes if (nt != a->n) { 1090a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 109147b4a8eaSLois Curfman McInnes } 1092e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 109347b4a8eaSLois Curfman McInnes if (nt != a->m) { 1094a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 109547b4a8eaSLois Curfman McInnes } 109643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1097f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 109843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1099f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 110043a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 11013a40ed3dSBarry Smith PetscFunctionReturn(0); 1102cee3aa6bSSatish Balay } 1103cee3aa6bSSatish Balay 11045615d1e5SSatish Balay #undef __FUNC__ 11055615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1106ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1107cee3aa6bSSatish Balay { 1108cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1109cee3aa6bSSatish Balay int ierr; 1110d64ed03dSBarry Smith 1111d64ed03dSBarry Smith PetscFunctionBegin; 111243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1113f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 111443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1115f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 11163a40ed3dSBarry Smith PetscFunctionReturn(0); 1117cee3aa6bSSatish Balay } 1118cee3aa6bSSatish Balay 11195615d1e5SSatish Balay #undef __FUNC__ 11205615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1121ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1122cee3aa6bSSatish Balay { 1123cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1124cee3aa6bSSatish Balay int ierr; 1125cee3aa6bSSatish Balay 1126d64ed03dSBarry Smith PetscFunctionBegin; 1127cee3aa6bSSatish Balay /* do nondiagonal part */ 1128f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1129cee3aa6bSSatish Balay /* send it on its way */ 1130537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1131cee3aa6bSSatish Balay /* do local part */ 1132f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1133cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1134cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1135cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1136639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 11373a40ed3dSBarry Smith PetscFunctionReturn(0); 1138cee3aa6bSSatish Balay } 1139cee3aa6bSSatish Balay 11405615d1e5SSatish Balay #undef __FUNC__ 11415615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1142ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1143cee3aa6bSSatish Balay { 1144cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1145cee3aa6bSSatish Balay int ierr; 1146cee3aa6bSSatish Balay 1147d64ed03dSBarry Smith PetscFunctionBegin; 1148cee3aa6bSSatish Balay /* do nondiagonal part */ 1149f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1150cee3aa6bSSatish Balay /* send it on its way */ 1151537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1152cee3aa6bSSatish Balay /* do local part */ 1153f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1154cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1155cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1156cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1157537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 11583a40ed3dSBarry Smith PetscFunctionReturn(0); 1159cee3aa6bSSatish Balay } 1160cee3aa6bSSatish Balay 1161cee3aa6bSSatish Balay /* 1162cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1163cee3aa6bSSatish Balay diagonal block 1164cee3aa6bSSatish Balay */ 11655615d1e5SSatish Balay #undef __FUNC__ 11665615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1167ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1168cee3aa6bSSatish Balay { 1169cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 11703a40ed3dSBarry Smith int ierr; 1171d64ed03dSBarry Smith 1172d64ed03dSBarry Smith PetscFunctionBegin; 1173a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 11743a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 11753a40ed3dSBarry Smith PetscFunctionReturn(0); 1176cee3aa6bSSatish Balay } 1177cee3aa6bSSatish Balay 11785615d1e5SSatish Balay #undef __FUNC__ 11795615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1180ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1181cee3aa6bSSatish Balay { 1182cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1183cee3aa6bSSatish Balay int ierr; 1184d64ed03dSBarry Smith 1185d64ed03dSBarry Smith PetscFunctionBegin; 1186cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1187cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 11883a40ed3dSBarry Smith PetscFunctionReturn(0); 1189cee3aa6bSSatish Balay } 1190026e39d0SSatish Balay 11915615d1e5SSatish Balay #undef __FUNC__ 11925615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1193ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 119457b952d6SSatish Balay { 119557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1196d64ed03dSBarry Smith 1197d64ed03dSBarry Smith PetscFunctionBegin; 1198bd7f49f5SSatish Balay if (m) *m = mat->M; 1199bd7f49f5SSatish Balay if (n) *n = mat->N; 12003a40ed3dSBarry Smith PetscFunctionReturn(0); 120157b952d6SSatish Balay } 120257b952d6SSatish Balay 12035615d1e5SSatish Balay #undef __FUNC__ 12045615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1205ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 120657b952d6SSatish Balay { 120757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1208d64ed03dSBarry Smith 1209d64ed03dSBarry Smith PetscFunctionBegin; 1210f830108cSBarry Smith *m = mat->m; *n = mat->n; 12113a40ed3dSBarry Smith PetscFunctionReturn(0); 121257b952d6SSatish Balay } 121357b952d6SSatish Balay 12145615d1e5SSatish Balay #undef __FUNC__ 12155615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1216ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 121757b952d6SSatish Balay { 121857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1219d64ed03dSBarry Smith 1220d64ed03dSBarry Smith PetscFunctionBegin; 122157b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 12223a40ed3dSBarry Smith PetscFunctionReturn(0); 122357b952d6SSatish Balay } 122457b952d6SSatish Balay 12255615d1e5SSatish Balay #undef __FUNC__ 12265615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1227acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1228acdf5bf4SSatish Balay { 1229acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1230acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1231acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1232d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1233d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1234acdf5bf4SSatish Balay 1235d64ed03dSBarry Smith PetscFunctionBegin; 1236a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1237acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1238acdf5bf4SSatish Balay 1239acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1240acdf5bf4SSatish Balay /* 1241acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1242acdf5bf4SSatish Balay */ 1243acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1244bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1245bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1246acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1247acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1248acdf5bf4SSatish Balay } 1249acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1250acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1251acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1252acdf5bf4SSatish Balay } 1253acdf5bf4SSatish Balay 1254a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1255d9d09a02SSatish Balay lrow = row - brstart; 1256acdf5bf4SSatish Balay 1257acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1258acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1259acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1260f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1261f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1262acdf5bf4SSatish Balay nztot = nzA + nzB; 1263acdf5bf4SSatish Balay 1264acdf5bf4SSatish Balay cmap = mat->garray; 1265acdf5bf4SSatish Balay if (v || idx) { 1266acdf5bf4SSatish Balay if (nztot) { 1267acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1268acdf5bf4SSatish Balay int imark = -1; 1269acdf5bf4SSatish Balay if (v) { 1270acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1271acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1272d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1273acdf5bf4SSatish Balay else break; 1274acdf5bf4SSatish Balay } 1275acdf5bf4SSatish Balay imark = i; 1276acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1277acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1278acdf5bf4SSatish Balay } 1279acdf5bf4SSatish Balay if (idx) { 1280acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1281acdf5bf4SSatish Balay if (imark > -1) { 1282acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1283bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1284acdf5bf4SSatish Balay } 1285acdf5bf4SSatish Balay } else { 1286acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1287d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1288d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1289acdf5bf4SSatish Balay else break; 1290acdf5bf4SSatish Balay } 1291acdf5bf4SSatish Balay imark = i; 1292acdf5bf4SSatish Balay } 1293d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1294d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1295acdf5bf4SSatish Balay } 1296d64ed03dSBarry Smith } else { 1297d212a18eSSatish Balay if (idx) *idx = 0; 1298d212a18eSSatish Balay if (v) *v = 0; 1299d212a18eSSatish Balay } 1300acdf5bf4SSatish Balay } 1301acdf5bf4SSatish Balay *nz = nztot; 1302f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1303f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 13043a40ed3dSBarry Smith PetscFunctionReturn(0); 1305acdf5bf4SSatish Balay } 1306acdf5bf4SSatish Balay 13075615d1e5SSatish Balay #undef __FUNC__ 13085615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1309acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1310acdf5bf4SSatish Balay { 1311acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1312d64ed03dSBarry Smith 1313d64ed03dSBarry Smith PetscFunctionBegin; 1314acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1315a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1316acdf5bf4SSatish Balay } 1317acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13183a40ed3dSBarry Smith PetscFunctionReturn(0); 1319acdf5bf4SSatish Balay } 1320acdf5bf4SSatish Balay 13215615d1e5SSatish Balay #undef __FUNC__ 13225615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1323ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 13245a838052SSatish Balay { 13255a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1326d64ed03dSBarry Smith 1327d64ed03dSBarry Smith PetscFunctionBegin; 13285a838052SSatish Balay *bs = baij->bs; 13293a40ed3dSBarry Smith PetscFunctionReturn(0); 13305a838052SSatish Balay } 13315a838052SSatish Balay 13325615d1e5SSatish Balay #undef __FUNC__ 13335615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1334ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 133558667388SSatish Balay { 133658667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 133758667388SSatish Balay int ierr; 1338d64ed03dSBarry Smith 1339d64ed03dSBarry Smith PetscFunctionBegin; 134058667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 134158667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 13423a40ed3dSBarry Smith PetscFunctionReturn(0); 134358667388SSatish Balay } 13440ac07820SSatish Balay 13455615d1e5SSatish Balay #undef __FUNC__ 13465615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1347ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 13480ac07820SSatish Balay { 13494e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 13504e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 13517d57db60SLois Curfman McInnes int ierr; 13527d57db60SLois Curfman McInnes double isend[5], irecv[5]; 13530ac07820SSatish Balay 1354d64ed03dSBarry Smith PetscFunctionBegin; 13554e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 13564e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 13570e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1358de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 13594e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 13600e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1361de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 13620ac07820SSatish Balay if (flag == MAT_LOCAL) { 13634e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 13644e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 13654e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 13664e220ebcSLois Curfman McInnes info->memory = isend[3]; 13674e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 13680ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1369f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 13704e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13714e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13724e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13734e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13744e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13750ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1376f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 13774e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13784e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13794e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13804e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13814e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1382d41123aaSBarry Smith } else { 1383d41123aaSBarry Smith SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 13840ac07820SSatish Balay } 13855a5d4f66SBarry Smith info->rows_global = (double)a->M; 13865a5d4f66SBarry Smith info->columns_global = (double)a->N; 13875a5d4f66SBarry Smith info->rows_local = (double)a->m; 13885a5d4f66SBarry Smith info->columns_local = (double)a->N; 13894e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 13904e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 13914e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 13923a40ed3dSBarry Smith PetscFunctionReturn(0); 13930ac07820SSatish Balay } 13940ac07820SSatish Balay 13955615d1e5SSatish Balay #undef __FUNC__ 13965615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1397ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 139858667388SSatish Balay { 139958667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 140098305bb5SBarry Smith int ierr; 140158667388SSatish Balay 1402d64ed03dSBarry Smith PetscFunctionBegin; 140358667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 140458667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 14056da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1406c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 14074787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 14084787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 140998305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 141098305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1411b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1412aeafbbfcSLois Curfman McInnes a->roworiented = 1; 141398305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 141498305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1415b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 14166da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 141758667388SSatish Balay op == MAT_SYMMETRIC || 141858667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1419b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 142098305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 142158667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 142298305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 142358667388SSatish Balay a->roworiented = 0; 142498305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 142598305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 14262b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 142790f02eecSBarry Smith a->donotstash = 1; 1428d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1429d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1430133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 1431133cdb44SSatish Balay a->ht_flag = 1; 1432d64ed03dSBarry Smith } else { 1433d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1434d64ed03dSBarry Smith } 14353a40ed3dSBarry Smith PetscFunctionReturn(0); 143658667388SSatish Balay } 143758667388SSatish Balay 14385615d1e5SSatish Balay #undef __FUNC__ 14395615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1440ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 14410ac07820SSatish Balay { 14420ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 14430ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 14440ac07820SSatish Balay Mat B; 144540011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 14460ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 14470ac07820SSatish Balay Scalar *a; 14480ac07820SSatish Balay 1449d64ed03dSBarry Smith PetscFunctionBegin; 1450a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 145128937c7bSBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 14520ac07820SSatish Balay CHKERRQ(ierr); 14530ac07820SSatish Balay 14540ac07820SSatish Balay /* copy over the A part */ 14550ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 14560ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14570ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 14580ac07820SSatish Balay 14590ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14600ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14610ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14620ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14630ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 14640ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14650ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14660ac07820SSatish Balay col++; a += bs; 14670ac07820SSatish Balay } 14680ac07820SSatish Balay } 14690ac07820SSatish Balay } 14700ac07820SSatish Balay /* copy over the B part */ 14710ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 14720ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14730ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14740ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14750ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14760ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14770ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 14780ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14790ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14800ac07820SSatish Balay col++; a += bs; 14810ac07820SSatish Balay } 14820ac07820SSatish Balay } 14830ac07820SSatish Balay } 14840ac07820SSatish Balay PetscFree(rvals); 14850ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14860ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14870ac07820SSatish Balay 14880ac07820SSatish Balay if (matout != PETSC_NULL) { 14890ac07820SSatish Balay *matout = B; 14900ac07820SSatish Balay } else { 1491f830108cSBarry Smith PetscOps *Abops; 1492cc2dc46cSBarry Smith MatOps Aops; 1493f830108cSBarry Smith 14940ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 14950ac07820SSatish Balay PetscFree(baij->rowners); 14960ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 14970ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1498b1fc9764SSatish Balay #if defined (USE_CTABLE) 1499b1fc9764SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 1500b1fc9764SSatish Balay #else 15010ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 1502b1fc9764SSatish Balay #endif 15030ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 15040ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 15050ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 15060ac07820SSatish Balay PetscFree(baij); 1507f830108cSBarry Smith 1508f830108cSBarry Smith /* 1509f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1510f830108cSBarry Smith A pointers for the bops and ops but copy everything 1511f830108cSBarry Smith else from C. 1512f830108cSBarry Smith */ 1513f830108cSBarry Smith Abops = A->bops; 1514f830108cSBarry Smith Aops = A->ops; 1515f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1516f830108cSBarry Smith A->bops = Abops; 1517f830108cSBarry Smith A->ops = Aops; 1518f830108cSBarry Smith 15190ac07820SSatish Balay PetscHeaderDestroy(B); 15200ac07820SSatish Balay } 15213a40ed3dSBarry Smith PetscFunctionReturn(0); 15220ac07820SSatish Balay } 15230e95ebc0SSatish Balay 15245615d1e5SSatish Balay #undef __FUNC__ 15255615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 15260e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 15270e95ebc0SSatish Balay { 15280e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 15290e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 15300e95ebc0SSatish Balay int ierr,s1,s2,s3; 15310e95ebc0SSatish Balay 1532d64ed03dSBarry Smith PetscFunctionBegin; 15330e95ebc0SSatish Balay if (ll) { 15340e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 15350e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1536a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 15370e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 15380e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 15390e95ebc0SSatish Balay } 1540a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 15413a40ed3dSBarry Smith PetscFunctionReturn(0); 15420e95ebc0SSatish Balay } 15430e95ebc0SSatish Balay 15445615d1e5SSatish Balay #undef __FUNC__ 15455615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1546ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 15470ac07820SSatish Balay { 15480ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 15490ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1550a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 15510ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 15520ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1553a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 15540ac07820SSatish Balay MPI_Comm comm = A->comm; 15550ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 15560ac07820SSatish Balay MPI_Status recv_status,*send_status; 15570ac07820SSatish Balay IS istmp; 15580ac07820SSatish Balay 1559d64ed03dSBarry Smith PetscFunctionBegin; 15600ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 15610ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 15620ac07820SSatish Balay 15630ac07820SSatish Balay /* first count number of contributors to each processor */ 15640ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 15650ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 15660ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 15670ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15680ac07820SSatish Balay idx = rows[i]; 15690ac07820SSatish Balay found = 0; 15700ac07820SSatish Balay for ( j=0; j<size; j++ ) { 15710ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 15720ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 15730ac07820SSatish Balay } 15740ac07820SSatish Balay } 1575a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 15760ac07820SSatish Balay } 15770ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 15780ac07820SSatish Balay 15790ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 15800ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1581ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 15820ac07820SSatish Balay nrecvs = work[rank]; 1583ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 15840ac07820SSatish Balay nmax = work[rank]; 15850ac07820SSatish Balay PetscFree(work); 15860ac07820SSatish Balay 15870ac07820SSatish Balay /* post receives: */ 1588d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1589d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 15900ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1591ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 15920ac07820SSatish Balay } 15930ac07820SSatish Balay 15940ac07820SSatish Balay /* do sends: 15950ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 15960ac07820SSatish Balay the ith processor 15970ac07820SSatish Balay */ 15980ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1599ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 16000ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 16010ac07820SSatish Balay starts[0] = 0; 16020ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16030ac07820SSatish Balay for ( i=0; i<N; i++ ) { 16040ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 16050ac07820SSatish Balay } 16060ac07820SSatish Balay ISRestoreIndices(is,&rows); 16070ac07820SSatish Balay 16080ac07820SSatish Balay starts[0] = 0; 16090ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 16100ac07820SSatish Balay count = 0; 16110ac07820SSatish Balay for ( i=0; i<size; i++ ) { 16120ac07820SSatish Balay if (procs[i]) { 1613ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 16140ac07820SSatish Balay } 16150ac07820SSatish Balay } 16160ac07820SSatish Balay PetscFree(starts); 16170ac07820SSatish Balay 16180ac07820SSatish Balay base = owners[rank]*bs; 16190ac07820SSatish Balay 16200ac07820SSatish Balay /* wait on receives */ 16210ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 16220ac07820SSatish Balay source = lens + nrecvs; 16230ac07820SSatish Balay count = nrecvs; slen = 0; 16240ac07820SSatish Balay while (count) { 1625ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 16260ac07820SSatish Balay /* unpack receives into our local space */ 1627ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 16280ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 16290ac07820SSatish Balay lens[imdex] = n; 16300ac07820SSatish Balay slen += n; 16310ac07820SSatish Balay count--; 16320ac07820SSatish Balay } 16330ac07820SSatish Balay PetscFree(recv_waits); 16340ac07820SSatish Balay 16350ac07820SSatish Balay /* move the data into the send scatter */ 16360ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 16370ac07820SSatish Balay count = 0; 16380ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 16390ac07820SSatish Balay values = rvalues + i*nmax; 16400ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 16410ac07820SSatish Balay lrows[count++] = values[j] - base; 16420ac07820SSatish Balay } 16430ac07820SSatish Balay } 16440ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 16450ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 16460ac07820SSatish Balay 16470ac07820SSatish Balay /* actually zap the local rows */ 1648029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 16490ac07820SSatish Balay PLogObjectParent(A,istmp); 1650a07cd24cSSatish Balay 165172dacd9aSBarry Smith /* 165272dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 165372dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 165472dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 165572dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 165672dacd9aSBarry Smith 165772dacd9aSBarry Smith Contributed by: Mathew Knepley 165872dacd9aSBarry Smith */ 16599c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 16600ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 16619c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 16629c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 16639c957beeSSatish Balay } else if (diag) { 16649c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1665fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1666fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1667fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 16686525c446SSatish Balay } 1669a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1670a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1671a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1672a07cd24cSSatish Balay } 1673a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1674a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16759c957beeSSatish Balay } else { 16769c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1677a07cd24cSSatish Balay } 16789c957beeSSatish Balay 16799c957beeSSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 1680a07cd24cSSatish Balay PetscFree(lrows); 1681a07cd24cSSatish Balay 16820ac07820SSatish Balay /* wait on sends */ 16830ac07820SSatish Balay if (nsends) { 1684d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1685ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 16860ac07820SSatish Balay PetscFree(send_status); 16870ac07820SSatish Balay } 16880ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 16890ac07820SSatish Balay 16903a40ed3dSBarry Smith PetscFunctionReturn(0); 16910ac07820SSatish Balay } 169272dacd9aSBarry Smith 16935615d1e5SSatish Balay #undef __FUNC__ 16945615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1695ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1696ba4ca20aSSatish Balay { 1697ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 169825fdafccSSatish Balay MPI_Comm comm = A->comm; 1699133cdb44SSatish Balay static int called = 0; 17003a40ed3dSBarry Smith int ierr; 1701ba4ca20aSSatish Balay 1702d64ed03dSBarry Smith PetscFunctionBegin; 17033a40ed3dSBarry Smith if (!a->rank) { 17043a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 170525fdafccSSatish Balay } 170625fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1707133cdb44SSatish Balay (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1708133cdb44SSatish Balay (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 17093a40ed3dSBarry Smith PetscFunctionReturn(0); 1710ba4ca20aSSatish Balay } 17110ac07820SSatish Balay 17125615d1e5SSatish Balay #undef __FUNC__ 17135615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1714ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1715bb5a7306SBarry Smith { 1716bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1717bb5a7306SBarry Smith int ierr; 1718d64ed03dSBarry Smith 1719d64ed03dSBarry Smith PetscFunctionBegin; 1720bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 17213a40ed3dSBarry Smith PetscFunctionReturn(0); 1722bb5a7306SBarry Smith } 1723bb5a7306SBarry Smith 17242e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 17250ac07820SSatish Balay 172679bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1727cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1728cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1729cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1730cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1731cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1732cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 1733cc2dc46cSBarry Smith MatMultTrans_MPIBAIJ, 1734cc2dc46cSBarry Smith MatMultTransAdd_MPIBAIJ, 1735cc2dc46cSBarry Smith 0, 1736cc2dc46cSBarry Smith 0, 1737cc2dc46cSBarry Smith 0, 1738cc2dc46cSBarry Smith 0, 1739cc2dc46cSBarry Smith 0, 1740cc2dc46cSBarry Smith 0, 1741cc2dc46cSBarry Smith 0, 1742cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1743cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 1744cc2dc46cSBarry Smith 0, 1745cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1746cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1747cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1748cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1749cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1750cc2dc46cSBarry Smith 0, 1751cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1752cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1753cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1754cc2dc46cSBarry Smith 0, 1755cc2dc46cSBarry Smith 0, 1756cc2dc46cSBarry Smith 0, 1757cc2dc46cSBarry Smith 0, 1758cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1759cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1760cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1761cc2dc46cSBarry Smith 0, 1762cc2dc46cSBarry Smith 0, 1763cc2dc46cSBarry Smith 0, 1764cc2dc46cSBarry Smith 0, 17652e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1766cc2dc46cSBarry Smith 0, 1767cc2dc46cSBarry Smith 0, 1768cc2dc46cSBarry Smith 0, 1769cc2dc46cSBarry Smith 0, 1770cc2dc46cSBarry Smith 0, 1771cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1772cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1773cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1774cc2dc46cSBarry Smith 0, 1775cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1776cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1777cc2dc46cSBarry Smith 0, 1778cc2dc46cSBarry Smith 0, 1779cc2dc46cSBarry Smith 0, 1780cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1781cc2dc46cSBarry Smith 0, 1782cc2dc46cSBarry Smith 0, 1783cc2dc46cSBarry Smith 0, 1784cc2dc46cSBarry Smith 0, 1785cc2dc46cSBarry Smith 0, 1786cc2dc46cSBarry Smith 0, 1787cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1788cc2dc46cSBarry Smith 0, 1789cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1790cc2dc46cSBarry Smith 0, 1791cc2dc46cSBarry Smith 0, 1792cc2dc46cSBarry Smith 0, 1793cc2dc46cSBarry Smith MatGetMaps_Petsc}; 179479bdfe76SSatish Balay 17955ef9f2a5SBarry Smith 1796e18c124aSSatish Balay EXTERN_C_BEGIN 17975ef9f2a5SBarry Smith #undef __FUNC__ 17985ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 17995ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 18005ef9f2a5SBarry Smith { 18015ef9f2a5SBarry Smith PetscFunctionBegin; 18025ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 18035ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 18045ef9f2a5SBarry Smith PetscFunctionReturn(0); 18055ef9f2a5SBarry Smith } 1806e18c124aSSatish Balay EXTERN_C_END 180779bdfe76SSatish Balay 18085615d1e5SSatish Balay #undef __FUNC__ 18095615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 181079bdfe76SSatish Balay /*@C 181179bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 181279bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 181379bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 181479bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 181579bdfe76SSatish Balay performance can be increased by more than a factor of 50. 181679bdfe76SSatish Balay 1817db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1818db81eaa0SLois Curfman McInnes 181979bdfe76SSatish Balay Input Parameters: 1820db81eaa0SLois Curfman McInnes + comm - MPI communicator 182179bdfe76SSatish Balay . bs - size of blockk 182279bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 182392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 182492e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 182592e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 182692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 182792e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1828be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1829be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 183079bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 183179bdfe76SSatish Balay submatrix (same for all local rows) 183292e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 183392e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 1834db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 183592e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 183679bdfe76SSatish Balay submatrix (same for all local rows). 1837db81eaa0SLois Curfman McInnes - o_nzz - array containing the number of nonzeros in the various block rows of the 183892e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 183992e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 184079bdfe76SSatish Balay 184179bdfe76SSatish Balay Output Parameter: 184279bdfe76SSatish Balay . A - the matrix 184379bdfe76SSatish Balay 1844db81eaa0SLois Curfman McInnes Options Database Keys: 1845db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 1846db81eaa0SLois Curfman McInnes block calculations (much slower) 1847db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 1848494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1849494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 18503ffaccefSLois Curfman McInnes 1851b259b22eSLois Curfman McInnes Notes: 185279bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 185379bdfe76SSatish Balay (possibly both). 185479bdfe76SSatish Balay 1855be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1856be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 1857be79a94dSBarry Smith 185879bdfe76SSatish Balay Storage Information: 185979bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 186079bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 186179bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 186279bdfe76SSatish Balay local matrix (a rectangular submatrix). 186379bdfe76SSatish Balay 186479bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 186579bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 186679bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 186779bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 186879bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 186979bdfe76SSatish Balay 187079bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 187179bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 187279bdfe76SSatish Balay 1873db81eaa0SLois Curfman McInnes .vb 1874db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1875db81eaa0SLois Curfman McInnes ------------------- 1876db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1877db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1878db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1879db81eaa0SLois Curfman McInnes ------------------- 1880db81eaa0SLois Curfman McInnes .ve 188179bdfe76SSatish Balay 188279bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 188379bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 188479bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 188557b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 188679bdfe76SSatish Balay 1887d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1888d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 188979bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 189092e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 189192e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 18926da5968aSLois Curfman McInnes matrices. 189379bdfe76SSatish Balay 1894027ccd11SLois Curfman McInnes Level: intermediate 1895027ccd11SLois Curfman McInnes 189692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 189779bdfe76SSatish Balay 1898db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 189979bdfe76SSatish Balay @*/ 190079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 190179bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 190279bdfe76SSatish Balay { 190379bdfe76SSatish Balay Mat B; 190479bdfe76SSatish Balay Mat_MPIBAIJ *b; 1905133cdb44SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1906a2ab621fSBarry Smith int flag1 = 0,flag2 = 0; 190779bdfe76SSatish Balay 1908d64ed03dSBarry Smith PetscFunctionBegin; 1909a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 19103914022bSBarry Smith 19113914022bSBarry Smith MPI_Comm_size(comm,&size); 1912494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1913494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1914494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 19153914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 19163914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 19173914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 19183a40ed3dSBarry Smith PetscFunctionReturn(0); 19193914022bSBarry Smith } 19203914022bSBarry Smith 192179bdfe76SSatish Balay *A = 0; 19223f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 192379bdfe76SSatish Balay PLogObjectCreate(B); 192479bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 192579bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1926cc2dc46cSBarry Smith PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 19274c50302cSBarry Smith 1928e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 1929e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 193090f02eecSBarry Smith B->mapping = 0; 193179bdfe76SSatish Balay B->factor = 0; 193279bdfe76SSatish Balay B->assembled = PETSC_FALSE; 193379bdfe76SSatish Balay 1934e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 193579bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 193679bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 193779bdfe76SSatish Balay 1938d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1939a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1940d64ed03dSBarry Smith } 1941a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1942a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1943a8c6a408SBarry Smith } 1944a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1945a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1946a8c6a408SBarry Smith } 1947cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1948cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 194979bdfe76SSatish Balay 195079bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 195179bdfe76SSatish Balay work[0] = m; work[1] = n; 195279bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1953ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 195479bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 195579bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 195679bdfe76SSatish Balay } 195779bdfe76SSatish Balay if (m == PETSC_DECIDE) { 195879bdfe76SSatish Balay Mbs = M/bs; 1959a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 196079bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 196179bdfe76SSatish Balay m = mbs*bs; 196279bdfe76SSatish Balay } 196379bdfe76SSatish Balay if (n == PETSC_DECIDE) { 196479bdfe76SSatish Balay Nbs = N/bs; 1965a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 196679bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 196779bdfe76SSatish Balay n = nbs*bs; 196879bdfe76SSatish Balay } 1969a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 1970a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1971a8c6a408SBarry Smith } 197279bdfe76SSatish Balay 197379bdfe76SSatish Balay b->m = m; B->m = m; 197479bdfe76SSatish Balay b->n = n; B->n = n; 197579bdfe76SSatish Balay b->N = N; B->N = N; 197679bdfe76SSatish Balay b->M = M; B->M = M; 197779bdfe76SSatish Balay b->bs = bs; 197879bdfe76SSatish Balay b->bs2 = bs*bs; 197979bdfe76SSatish Balay b->mbs = mbs; 198079bdfe76SSatish Balay b->nbs = nbs; 198179bdfe76SSatish Balay b->Mbs = Mbs; 198279bdfe76SSatish Balay b->Nbs = Nbs; 198379bdfe76SSatish Balay 1984c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 1985c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 1986488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1987488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1988c7fcc2eaSBarry Smith 198979bdfe76SSatish Balay /* build local table of row and column ownerships */ 1990ff2fd236SBarry Smith b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1991ff2fd236SBarry Smith PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 19920ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 1993ff2fd236SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 1994ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 199579bdfe76SSatish Balay b->rowners[0] = 0; 199679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 199779bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 199879bdfe76SSatish Balay } 1999ff2fd236SBarry Smith for ( i=0; i<=b->size; i++ ) { 2000ff2fd236SBarry Smith b->rowners_bs[i] = b->rowners[i]*bs; 2001ff2fd236SBarry Smith } 200279bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 200379bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 20044fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 20054fa0d573SSatish Balay b->rend_bs = b->rend * bs; 20064fa0d573SSatish Balay 2007ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 200879bdfe76SSatish Balay b->cowners[0] = 0; 200979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 201079bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 201179bdfe76SSatish Balay } 201279bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 201379bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 20144fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 20154fa0d573SSatish Balay b->cend_bs = b->cend * bs; 201679bdfe76SSatish Balay 2017a07cd24cSSatish Balay 201879bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2019029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 202079bdfe76SSatish Balay PLogObjectParent(B,b->A); 202179bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2022029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 202379bdfe76SSatish Balay PLogObjectParent(B,b->B); 202479bdfe76SSatish Balay 202579bdfe76SSatish Balay /* build cache for off array entries formed */ 20268798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr); 20278798bf22SSatish Balay ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr); 202890f02eecSBarry Smith b->donotstash = 0; 202979bdfe76SSatish Balay b->colmap = 0; 203079bdfe76SSatish Balay b->garray = 0; 203179bdfe76SSatish Balay b->roworiented = 1; 203279bdfe76SSatish Balay 203330793edcSSatish Balay /* stuff used in block assembly */ 203430793edcSSatish Balay b->barray = 0; 203530793edcSSatish Balay 203679bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 203779bdfe76SSatish Balay b->lvec = 0; 203879bdfe76SSatish Balay b->Mvctx = 0; 203979bdfe76SSatish Balay 204079bdfe76SSatish Balay /* stuff for MatGetRow() */ 204179bdfe76SSatish Balay b->rowindices = 0; 204279bdfe76SSatish Balay b->rowvalues = 0; 204379bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 204479bdfe76SSatish Balay 2045a07cd24cSSatish Balay /* hash table stuff */ 2046a07cd24cSSatish Balay b->ht = 0; 2047187ce0cbSSatish Balay b->hd = 0; 20480bdbc534SSatish Balay b->ht_size = 0; 2049133cdb44SSatish Balay b->ht_flag = 0; 205025fdafccSSatish Balay b->ht_fact = 0; 2051187ce0cbSSatish Balay b->ht_total_ct = 0; 2052187ce0cbSSatish Balay b->ht_insert_ct = 0; 2053a07cd24cSSatish Balay 205479bdfe76SSatish Balay *A = B; 2055133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2056133cdb44SSatish Balay if (flg) { 2057133cdb44SSatish Balay double fact = 1.39; 2058133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2059133cdb44SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2060133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2061133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2062133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2063133cdb44SSatish Balay } 20645ef9f2a5SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 20655ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 20665ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 20673a40ed3dSBarry Smith PetscFunctionReturn(0); 206879bdfe76SSatish Balay } 2069026e39d0SSatish Balay 20705615d1e5SSatish Balay #undef __FUNC__ 20712e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ" 20722e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 20730ac07820SSatish Balay { 20740ac07820SSatish Balay Mat mat; 20750ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 20760ac07820SSatish Balay int ierr, len=0, flg; 20770ac07820SSatish Balay 2078d64ed03dSBarry Smith PetscFunctionBegin; 20790ac07820SSatish Balay *newmat = 0; 20803f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 20810ac07820SSatish Balay PLogObjectCreate(mat); 20820ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2083cc2dc46cSBarry Smith PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2084e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2085e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 20860ac07820SSatish Balay mat->factor = matin->factor; 20870ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2088aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 20890ac07820SSatish Balay 20900ac07820SSatish Balay a->m = mat->m = oldmat->m; 20910ac07820SSatish Balay a->n = mat->n = oldmat->n; 20920ac07820SSatish Balay a->M = mat->M = oldmat->M; 20930ac07820SSatish Balay a->N = mat->N = oldmat->N; 20940ac07820SSatish Balay 20950ac07820SSatish Balay a->bs = oldmat->bs; 20960ac07820SSatish Balay a->bs2 = oldmat->bs2; 20970ac07820SSatish Balay a->mbs = oldmat->mbs; 20980ac07820SSatish Balay a->nbs = oldmat->nbs; 20990ac07820SSatish Balay a->Mbs = oldmat->Mbs; 21000ac07820SSatish Balay a->Nbs = oldmat->Nbs; 21010ac07820SSatish Balay 21020ac07820SSatish Balay a->rstart = oldmat->rstart; 21030ac07820SSatish Balay a->rend = oldmat->rend; 21040ac07820SSatish Balay a->cstart = oldmat->cstart; 21050ac07820SSatish Balay a->cend = oldmat->cend; 21060ac07820SSatish Balay a->size = oldmat->size; 21070ac07820SSatish Balay a->rank = oldmat->rank; 2108aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2109aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2110aef5e8e0SSatish Balay a->rowindices = 0; 21110ac07820SSatish Balay a->rowvalues = 0; 21120ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 211330793edcSSatish Balay a->barray = 0; 21143123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 21153123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 21163123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 21173123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 21180ac07820SSatish Balay 2119133cdb44SSatish Balay /* hash table stuff */ 2120133cdb44SSatish Balay a->ht = 0; 2121133cdb44SSatish Balay a->hd = 0; 2122133cdb44SSatish Balay a->ht_size = 0; 2123133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 212425fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2125133cdb44SSatish Balay a->ht_total_ct = 0; 2126133cdb44SSatish Balay a->ht_insert_ct = 0; 2127133cdb44SSatish Balay 2128133cdb44SSatish Balay 2129ff2fd236SBarry Smith a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2130ff2fd236SBarry Smith PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 21310ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 2132ff2fd236SBarry Smith a->rowners_bs = a->cowners + a->size + 2; 2133ff2fd236SBarry Smith PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int)); 21348798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr); 21358798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr); 21360ac07820SSatish Balay if (oldmat->colmap) { 213748e59246SSatish Balay #if defined (USE_CTABLE) 2138fa46199cSSatish Balay ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 213948e59246SSatish Balay #else 21400ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 21410ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 21420ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 214348e59246SSatish Balay #endif 21440ac07820SSatish Balay } else a->colmap = 0; 21450ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 21460ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 21470ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 21480ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 21490ac07820SSatish Balay } else a->garray = 0; 21500ac07820SSatish Balay 21510ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 21520ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 21530ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2154e18c124aSSatish Balay 21550ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 21562e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 21570ac07820SSatish Balay PLogObjectParent(mat,a->A); 21582e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 21590ac07820SSatish Balay PLogObjectParent(mat,a->B); 21600ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2161e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 21620ac07820SSatish Balay if (flg) { 21630ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 21640ac07820SSatish Balay } 21650ac07820SSatish Balay *newmat = mat; 21663a40ed3dSBarry Smith PetscFunctionReturn(0); 21670ac07820SSatish Balay } 216857b952d6SSatish Balay 216957b952d6SSatish Balay #include "sys.h" 217057b952d6SSatish Balay 21715615d1e5SSatish Balay #undef __FUNC__ 21725615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 217357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 217457b952d6SSatish Balay { 217557b952d6SSatish Balay Mat A; 217657b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 217757b952d6SSatish Balay Scalar *vals,*buf; 217857b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 217957b952d6SSatish Balay MPI_Status status; 2180cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 218157b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 218240011551SBarry Smith int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 218357b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 218457b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 218557b952d6SSatish Balay 2186d64ed03dSBarry Smith PetscFunctionBegin; 218757b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 218857b952d6SSatish Balay 218957b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 219057b952d6SSatish Balay if (!rank) { 219157b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2192e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2193a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2194d64ed03dSBarry Smith if (header[3] < 0) { 2195a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2196d64ed03dSBarry Smith } 21976c5fab8fSBarry Smith } 2198d64ed03dSBarry Smith 2199ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 220057b952d6SSatish Balay M = header[1]; N = header[2]; 220157b952d6SSatish Balay 2202a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 220357b952d6SSatish Balay 220457b952d6SSatish Balay /* 220557b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 220657b952d6SSatish Balay divisible by the blocksize 220757b952d6SSatish Balay */ 220857b952d6SSatish Balay Mbs = M/bs; 220957b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 221057b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 221157b952d6SSatish Balay else Mbs++; 221257b952d6SSatish Balay if (extra_rows &&!rank) { 2213b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 221457b952d6SSatish Balay } 2215537820f0SBarry Smith 221657b952d6SSatish Balay /* determine ownership of all rows */ 221757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 221857b952d6SSatish Balay m = mbs * bs; 2219cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2220cee3aa6bSSatish Balay browners = rowners + size + 1; 2221ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 222257b952d6SSatish Balay rowners[0] = 0; 2223cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2224cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 222557b952d6SSatish Balay rstart = rowners[rank]; 222657b952d6SSatish Balay rend = rowners[rank+1]; 222757b952d6SSatish Balay 222857b952d6SSatish Balay /* distribute row lengths to all processors */ 222957b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 223057b952d6SSatish Balay if (!rank) { 223157b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2232e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 223357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 223457b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2235cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2236ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 223757b952d6SSatish Balay PetscFree(sndcounts); 2238d64ed03dSBarry Smith } else { 2239ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 224057b952d6SSatish Balay } 224157b952d6SSatish Balay 224257b952d6SSatish Balay if (!rank) { 224357b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 224457b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 224557b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 224657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 224757b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 224857b952d6SSatish Balay procsnz[i] += rowlengths[j]; 224957b952d6SSatish Balay } 225057b952d6SSatish Balay } 225157b952d6SSatish Balay PetscFree(rowlengths); 225257b952d6SSatish Balay 225357b952d6SSatish Balay /* determine max buffer needed and allocate it */ 225457b952d6SSatish Balay maxnz = 0; 225557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 225657b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 225757b952d6SSatish Balay } 225857b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 225957b952d6SSatish Balay 226057b952d6SSatish Balay /* read in my part of the matrix column indices */ 226157b952d6SSatish Balay nz = procsnz[0]; 226257b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 226357b952d6SSatish Balay mycols = ibuf; 2264cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2265e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2266cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2267cee3aa6bSSatish Balay 226857b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 226957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 227057b952d6SSatish Balay nz = procsnz[i]; 2271e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2272ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 227357b952d6SSatish Balay } 227457b952d6SSatish Balay /* read in the stuff for the last proc */ 227557b952d6SSatish Balay if ( size != 1 ) { 227657b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2277e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 227857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2279ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 228057b952d6SSatish Balay } 228157b952d6SSatish Balay PetscFree(cols); 2282d64ed03dSBarry Smith } else { 228357b952d6SSatish Balay /* determine buffer space needed for message */ 228457b952d6SSatish Balay nz = 0; 228557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 228657b952d6SSatish Balay nz += locrowlens[i]; 228757b952d6SSatish Balay } 228857b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 228957b952d6SSatish Balay mycols = ibuf; 229057b952d6SSatish Balay /* receive message of column indices*/ 2291ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2292ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2293a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 229457b952d6SSatish Balay } 229557b952d6SSatish Balay 229657b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2297cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2298cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 229957b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2300cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 230157b952d6SSatish Balay masked1 = mask + Mbs; 230257b952d6SSatish Balay masked2 = masked1 + Mbs; 230357b952d6SSatish Balay rowcount = 0; nzcount = 0; 230457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 230557b952d6SSatish Balay dcount = 0; 230657b952d6SSatish Balay odcount = 0; 230757b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 230857b952d6SSatish Balay kmax = locrowlens[rowcount]; 230957b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 231057b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 231157b952d6SSatish Balay if (!mask[tmp]) { 231257b952d6SSatish Balay mask[tmp] = 1; 231357b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 231457b952d6SSatish Balay else masked1[dcount++] = tmp; 231557b952d6SSatish Balay } 231657b952d6SSatish Balay } 231757b952d6SSatish Balay rowcount++; 231857b952d6SSatish Balay } 2319cee3aa6bSSatish Balay 232057b952d6SSatish Balay dlens[i] = dcount; 232157b952d6SSatish Balay odlens[i] = odcount; 2322cee3aa6bSSatish Balay 232357b952d6SSatish Balay /* zero out the mask elements we set */ 232457b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 232557b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 232657b952d6SSatish Balay } 2327cee3aa6bSSatish Balay 232857b952d6SSatish Balay /* create our matrix */ 2329537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2330537820f0SBarry Smith CHKERRQ(ierr); 233157b952d6SSatish Balay A = *newmat; 23326d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 233357b952d6SSatish Balay 233457b952d6SSatish Balay if (!rank) { 233557b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 233657b952d6SSatish Balay /* read in my part of the matrix numerical values */ 233757b952d6SSatish Balay nz = procsnz[0]; 233857b952d6SSatish Balay vals = buf; 2339cee3aa6bSSatish Balay mycols = ibuf; 2340cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2341e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2342cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2343537820f0SBarry Smith 234457b952d6SSatish Balay /* insert into matrix */ 234557b952d6SSatish Balay jj = rstart*bs; 234657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 234757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 234857b952d6SSatish Balay mycols += locrowlens[i]; 234957b952d6SSatish Balay vals += locrowlens[i]; 235057b952d6SSatish Balay jj++; 235157b952d6SSatish Balay } 235257b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 235357b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 235457b952d6SSatish Balay nz = procsnz[i]; 235557b952d6SSatish Balay vals = buf; 2356e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2357ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 235857b952d6SSatish Balay } 235957b952d6SSatish Balay /* the last proc */ 236057b952d6SSatish Balay if ( size != 1 ){ 236157b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2362cee3aa6bSSatish Balay vals = buf; 2363e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 236457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2365ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 236657b952d6SSatish Balay } 236757b952d6SSatish Balay PetscFree(procsnz); 2368d64ed03dSBarry Smith } else { 236957b952d6SSatish Balay /* receive numeric values */ 237057b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 237157b952d6SSatish Balay 237257b952d6SSatish Balay /* receive message of values*/ 237357b952d6SSatish Balay vals = buf; 2374cee3aa6bSSatish Balay mycols = ibuf; 2375ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2376ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2377a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 237857b952d6SSatish Balay 237957b952d6SSatish Balay /* insert into matrix */ 238057b952d6SSatish Balay jj = rstart*bs; 2381cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 238257b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 238357b952d6SSatish Balay mycols += locrowlens[i]; 238457b952d6SSatish Balay vals += locrowlens[i]; 238557b952d6SSatish Balay jj++; 238657b952d6SSatish Balay } 238757b952d6SSatish Balay } 238857b952d6SSatish Balay PetscFree(locrowlens); 238957b952d6SSatish Balay PetscFree(buf); 239057b952d6SSatish Balay PetscFree(ibuf); 239157b952d6SSatish Balay PetscFree(rowners); 239257b952d6SSatish Balay PetscFree(dlens); 2393cee3aa6bSSatish Balay PetscFree(mask); 23946d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 23956d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 23963a40ed3dSBarry Smith PetscFunctionReturn(0); 239757b952d6SSatish Balay } 239857b952d6SSatish Balay 239957b952d6SSatish Balay 2400133cdb44SSatish Balay 2401133cdb44SSatish Balay #undef __FUNC__ 2402133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2403133cdb44SSatish Balay /*@ 2404133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2405133cdb44SSatish Balay 2406133cdb44SSatish Balay Input Parameters: 2407133cdb44SSatish Balay . mat - the matrix 2408133cdb44SSatish Balay . fact - factor 2409133cdb44SSatish Balay 2410fee21e36SBarry Smith Collective on Mat 2411fee21e36SBarry Smith 24128c890885SBarry Smith Level: advanced 24138c890885SBarry Smith 2414133cdb44SSatish Balay Notes: 2415133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2416133cdb44SSatish Balay 2417133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2418133cdb44SSatish Balay 2419133cdb44SSatish Balay .seealso: MatSetOption() 2420133cdb44SSatish Balay @*/ 2421133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2422133cdb44SSatish Balay { 242325fdafccSSatish Balay Mat_MPIBAIJ *baij; 2424133cdb44SSatish Balay 2425133cdb44SSatish Balay PetscFunctionBegin; 2426133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 242725fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2428133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2429133cdb44SSatish Balay } 2430133cdb44SSatish Balay baij = (Mat_MPIBAIJ*) mat->data; 2431133cdb44SSatish Balay baij->ht_fact = fact; 2432133cdb44SSatish Balay PetscFunctionReturn(0); 2433133cdb44SSatish Balay } 2434