1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*bbb85fb3SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.155 1999/02/26 17:08:24 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13*bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14*bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15*bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16*bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17*bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 183b2fbd54SBarry Smith 19537820f0SBarry Smith /* 20537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2157b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2257b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2357b952d6SSatish Balay length of colmap equals the global matrix length. 2457b952d6SSatish Balay */ 255615d1e5SSatish Balay #undef __FUNC__ 265615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2857b952d6SSatish Balay { 2957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3057b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 316dee3f7eSBarry Smith int nbs = B->nbs,i,bs=B->bs; 326dee3f7eSBarry Smith #if defined (USE_CTABLE) 336dee3f7eSBarry Smith int ierr; 346dee3f7eSBarry Smith #endif 3557b952d6SSatish Balay 36d64ed03dSBarry Smith PetscFunctionBegin; 3748e59246SSatish Balay #if defined (USE_CTABLE) 38fa46199cSSatish Balay ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 3948e59246SSatish Balay for ( i=0; i<nbs; i++ ){ 4048e59246SSatish Balay ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 4148e59246SSatish Balay } 4248e59246SSatish Balay #else 43758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 4457b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4557b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 46928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4748e59246SSatish Balay #endif 483a40ed3dSBarry Smith PetscFunctionReturn(0); 4957b952d6SSatish Balay } 5057b952d6SSatish Balay 5180c1aa95SSatish Balay #define CHUNKSIZE 10 5280c1aa95SSatish Balay 53f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 5480c1aa95SSatish Balay { \ 5580c1aa95SSatish Balay \ 5680c1aa95SSatish Balay brow = row/bs; \ 5780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 58ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 5980c1aa95SSatish Balay bcol = col/bs; \ 6080c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 61ab26458aSBarry Smith low = 0; high = nrow; \ 62ab26458aSBarry Smith while (high-low > 3) { \ 63ab26458aSBarry Smith t = (low+high)/2; \ 64ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 65ab26458aSBarry Smith else low = t; \ 66ab26458aSBarry Smith } \ 67ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 6880c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 6980c1aa95SSatish Balay if (rp[_i] == bcol) { \ 7080c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 71eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 72eada6651SSatish Balay else *bap = value; \ 73ac7a638eSSatish Balay goto a_noinsert; \ 7480c1aa95SSatish Balay } \ 7580c1aa95SSatish Balay } \ 7689280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 77a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 7880c1aa95SSatish Balay if (nrow >= rmax) { \ 7980c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 8080c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 8180c1aa95SSatish Balay Scalar *new_a; \ 8280c1aa95SSatish Balay \ 83a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 8489280ab3SLois Curfman McInnes \ 8580c1aa95SSatish Balay /* malloc new storage space */ \ 8680c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 8780c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 8880c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 8980c1aa95SSatish Balay new_i = new_j + new_nz; \ 9080c1aa95SSatish Balay \ 9180c1aa95SSatish Balay /* copy over old data into new slots */ \ 9280c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 9380c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 9480c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 9580c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 9680c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 9780c1aa95SSatish Balay len*sizeof(int)); \ 9880c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 9980c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 10080c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 10180c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 10280c1aa95SSatish Balay /* free up old matrix storage */ \ 10380c1aa95SSatish Balay PetscFree(a->a); \ 10480c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 10580c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 10680c1aa95SSatish Balay a->singlemalloc = 1; \ 10780c1aa95SSatish Balay \ 10880c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 11080c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 11180c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 11280c1aa95SSatish Balay a->reallocs++; \ 11380c1aa95SSatish Balay a->nz++; \ 11480c1aa95SSatish Balay } \ 11580c1aa95SSatish Balay N = nrow++ - 1; \ 11680c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 11780c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 11880c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 11980c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 12080c1aa95SSatish Balay } \ 12180c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 12280c1aa95SSatish Balay rp[_i] = bcol; \ 12380c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 124ac7a638eSSatish Balay a_noinsert:; \ 12580c1aa95SSatish Balay ailen[brow] = nrow; \ 12680c1aa95SSatish Balay } 12757b952d6SSatish Balay 128ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 129ac7a638eSSatish Balay { \ 130ac7a638eSSatish Balay \ 131ac7a638eSSatish Balay brow = row/bs; \ 132ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 133ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 134ac7a638eSSatish Balay bcol = col/bs; \ 135ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 136ac7a638eSSatish Balay low = 0; high = nrow; \ 137ac7a638eSSatish Balay while (high-low > 3) { \ 138ac7a638eSSatish Balay t = (low+high)/2; \ 139ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 140ac7a638eSSatish Balay else low = t; \ 141ac7a638eSSatish Balay } \ 142ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 143ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 144ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 145ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 146ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 147ac7a638eSSatish Balay else *bap = value; \ 148ac7a638eSSatish Balay goto b_noinsert; \ 149ac7a638eSSatish Balay } \ 150ac7a638eSSatish Balay } \ 15189280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 152a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 153ac7a638eSSatish Balay if (nrow >= rmax) { \ 154ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 15574c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156ac7a638eSSatish Balay Scalar *new_a; \ 157ac7a638eSSatish Balay \ 158a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 15989280ab3SLois Curfman McInnes \ 160ac7a638eSSatish Balay /* malloc new storage space */ \ 16174c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 162ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 163ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 164ac7a638eSSatish Balay new_i = new_j + new_nz; \ 165ac7a638eSSatish Balay \ 166ac7a638eSSatish Balay /* copy over old data into new slots */ \ 167ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 16874c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 169ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 170ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 171ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 172ac7a638eSSatish Balay len*sizeof(int)); \ 173ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 174ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 175ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 176ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 177ac7a638eSSatish Balay /* free up old matrix storage */ \ 17874c639caSSatish Balay PetscFree(b->a); \ 17974c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 18074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 18174c639caSSatish Balay b->singlemalloc = 1; \ 182ac7a638eSSatish Balay \ 183ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 184ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 18574c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 18674c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 18774c639caSSatish Balay b->reallocs++; \ 18874c639caSSatish Balay b->nz++; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay N = nrow++ - 1; \ 191ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 192ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 193ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 194ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 195ac7a638eSSatish Balay } \ 196ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 197ac7a638eSSatish Balay rp[_i] = bcol; \ 198ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 199ac7a638eSSatish Balay b_noinsert:; \ 200ac7a638eSSatish Balay bilen[brow] = nrow; \ 201ac7a638eSSatish Balay } 202ac7a638eSSatish Balay 2035615d1e5SSatish Balay #undef __FUNC__ 2045615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 205ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 20657b952d6SSatish Balay { 20757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20857b952d6SSatish Balay Scalar value; 2094fa0d573SSatish Balay int ierr,i,j,row,col; 2104fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2114fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2124fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 21357b952d6SSatish Balay 214eada6651SSatish Balay /* Some Variables required in the macro */ 21580c1aa95SSatish Balay Mat A = baij->A; 21680c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 217ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 218ac7a638eSSatish Balay Scalar *aa=a->a; 219ac7a638eSSatish Balay 220ac7a638eSSatish Balay Mat B = baij->B; 221ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 222ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 223ac7a638eSSatish Balay Scalar *ba=b->a; 224ac7a638eSSatish Balay 225ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 226ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 227ac7a638eSSatish Balay Scalar *ap,*bap; 22880c1aa95SSatish Balay 229d64ed03dSBarry Smith PetscFunctionBegin; 23057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2315ef9f2a5SBarry Smith if (im[i] < 0) continue; 2323a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 233a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 234639f9d9dSBarry Smith #endif 23557b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 23657b952d6SSatish Balay row = im[i] - rstart_orig; 23757b952d6SSatish Balay for ( j=0; j<n; j++ ) { 23857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 23957b952d6SSatish Balay col = in[j] - cstart_orig; 24057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 241f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 24280c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 24357b952d6SSatish Balay } 2445ef9f2a5SBarry Smith else if (in[j] < 0) continue; 2453a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 246a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 247639f9d9dSBarry Smith #endif 24857b952d6SSatish Balay else { 24957b952d6SSatish Balay if (mat->was_assembled) { 250905e6a2fSBarry Smith if (!baij->colmap) { 251905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 252905e6a2fSBarry Smith } 25348e59246SSatish Balay #if defined (USE_CTABLE) 254fa46199cSSatish Balay ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 255fa46199cSSatish Balay col = col - 1 + in[j]%bs; 25648e59246SSatish Balay #else 257905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 25848e59246SSatish Balay #endif 25957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 26057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 26157b952d6SSatish Balay col = in[j]; 2629bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2639bf004c3SSatish Balay B = baij->B; 2649bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2659bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2669bf004c3SSatish Balay ba=b->a; 26757b952d6SSatish Balay } 268d64ed03dSBarry Smith } else col = in[j]; 26957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 270ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 271ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 27257b952d6SSatish Balay } 27357b952d6SSatish Balay } 274d64ed03dSBarry Smith } else { 27590f02eecSBarry Smith if (roworiented && !baij->donotstash) { 27657b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 277d64ed03dSBarry Smith } else { 27890f02eecSBarry Smith if (!baij->donotstash) { 27957b952d6SSatish Balay row = im[i]; 28057b952d6SSatish Balay for ( j=0; j<n; j++ ) { 28157b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 28257b952d6SSatish Balay } 28357b952d6SSatish Balay } 28457b952d6SSatish Balay } 28557b952d6SSatish Balay } 28690f02eecSBarry Smith } 2873a40ed3dSBarry Smith PetscFunctionReturn(0); 28857b952d6SSatish Balay } 28957b952d6SSatish Balay 290ab26458aSBarry Smith #undef __FUNC__ 291ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 292ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 293ab26458aSBarry Smith { 294ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 29530793edcSSatish Balay Scalar *value,*barray=baij->barray; 296abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 297ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 298ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 299ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 300ab26458aSBarry Smith 30130793edcSSatish Balay if(!barray) { 30247513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 30330793edcSSatish Balay } 30430793edcSSatish Balay 305ab26458aSBarry Smith if (roworiented) { 306ab26458aSBarry Smith stepval = (n-1)*bs; 307ab26458aSBarry Smith } else { 308ab26458aSBarry Smith stepval = (m-1)*bs; 309ab26458aSBarry Smith } 310ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 3115ef9f2a5SBarry Smith if (im[i] < 0) continue; 3123a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3135ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 314ab26458aSBarry Smith #endif 315ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 316ab26458aSBarry Smith row = im[i] - rstart; 317ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 31815b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 31915b57d14SSatish Balay if ((roworiented) && (n == 1)) { 32015b57d14SSatish Balay barray = v + i*bs2; 32115b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 32215b57d14SSatish Balay barray = v + j*bs2; 32315b57d14SSatish Balay } else { /* Here a copy is required */ 324ab26458aSBarry Smith if (roworiented) { 325ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 326ab26458aSBarry Smith } else { 327ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 328abef11f7SSatish Balay } 32947513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 33047513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 33130793edcSSatish Balay *barray++ = *value++; 33247513183SBarry Smith } 33347513183SBarry Smith } 33430793edcSSatish Balay barray -=bs2; 33515b57d14SSatish Balay } 336abef11f7SSatish Balay 337abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 338abef11f7SSatish Balay col = in[j] - cstart; 33930793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 340ab26458aSBarry Smith } 3415ef9f2a5SBarry Smith else if (in[j] < 0) continue; 3423a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 3435ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 344ab26458aSBarry Smith #endif 345ab26458aSBarry Smith else { 346ab26458aSBarry Smith if (mat->was_assembled) { 347ab26458aSBarry Smith if (!baij->colmap) { 348ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 349ab26458aSBarry Smith } 350a5eb4965SSatish Balay 3513a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 35248e59246SSatish Balay #if defined (USE_CTABLE) 353fa46199cSSatish Balay { int data; 354fa46199cSSatish Balay ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 355fa46199cSSatish Balay if((data - 1) % bs) 35648e59246SSatish Balay {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 357fa46199cSSatish Balay } 35848e59246SSatish Balay #else 359a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 360a5eb4965SSatish Balay #endif 36148e59246SSatish Balay #endif 36248e59246SSatish Balay #if defined (USE_CTABLE) 363fa46199cSSatish Balay ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 364fa46199cSSatish Balay col = (col - 1)/bs; 36548e59246SSatish Balay #else 366a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 36748e59246SSatish Balay #endif 368ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 369ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 370ab26458aSBarry Smith col = in[j]; 371ab26458aSBarry Smith } 372ab26458aSBarry Smith } 373ab26458aSBarry Smith else col = in[j]; 37430793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 375ab26458aSBarry Smith } 376ab26458aSBarry Smith } 377d64ed03dSBarry Smith } else { 378ab26458aSBarry Smith if (!baij->donotstash) { 379ab26458aSBarry Smith if (roworiented ) { 380abef11f7SSatish Balay row = im[i]*bs; 381abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 382abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 383abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 384abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 385abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 386abef11f7SSatish Balay } 387ab26458aSBarry Smith } 388ab26458aSBarry Smith } 389d64ed03dSBarry Smith } else { 390ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 391abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 392abef11f7SSatish Balay col = in[j]*bs; 393abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 394abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 395abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 396ab26458aSBarry Smith } 397ab26458aSBarry Smith } 398ab26458aSBarry Smith } 399abef11f7SSatish Balay } 400abef11f7SSatish Balay } 401ab26458aSBarry Smith } 402ab26458aSBarry Smith } 4033a40ed3dSBarry Smith PetscFunctionReturn(0); 404ab26458aSBarry Smith } 4050bdbc534SSatish Balay #define HASH_KEY 0.6180339887 406c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 407c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 408c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 4095615d1e5SSatish Balay #undef __FUNC__ 4100bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 4110bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4120bdbc534SSatish Balay { 4130bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4140bdbc534SSatish Balay int ierr,i,j,row,col; 4150bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 416c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 417c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 418c2760754SSatish Balay double tmp; 419b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 4204a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 4214a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4224a15367fSSatish Balay #endif 4230bdbc534SSatish Balay 4240bdbc534SSatish Balay PetscFunctionBegin; 4250bdbc534SSatish Balay 4260bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4270bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4280bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4290bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4300bdbc534SSatish Balay #endif 4310bdbc534SSatish Balay row = im[i]; 432c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4330bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4340bdbc534SSatish Balay col = in[j]; 4350bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4360bdbc534SSatish Balay /* Look up into the Hash Table */ 437c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 438c2760754SSatish Balay h1 = HASH(size,key,tmp); 4390bdbc534SSatish Balay 440c2760754SSatish Balay 441c2760754SSatish Balay idx = h1; 442187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 443187ce0cbSSatish Balay insert_ct++; 444187ce0cbSSatish Balay total_ct++; 445187ce0cbSSatish Balay if (HT[idx] != key) { 446187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 447187ce0cbSSatish Balay if (idx == size) { 448187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 449187ce0cbSSatish Balay if (idx == h1) { 450187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 451187ce0cbSSatish Balay } 452187ce0cbSSatish Balay } 453187ce0cbSSatish Balay } 454187ce0cbSSatish Balay #else 455c2760754SSatish Balay if (HT[idx] != key) { 456c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 457c2760754SSatish Balay if (idx == size) { 458c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 459c2760754SSatish Balay if (idx == h1) { 460c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 461c2760754SSatish Balay } 462c2760754SSatish Balay } 463c2760754SSatish Balay } 464187ce0cbSSatish Balay #endif 465c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 466c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 467c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4680bdbc534SSatish Balay } 4690bdbc534SSatish Balay } else { 4700bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 4710bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 4720bdbc534SSatish Balay } else { 4730bdbc534SSatish Balay if (!baij->donotstash) { 4740bdbc534SSatish Balay row = im[i]; 4750bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4760bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 4770bdbc534SSatish Balay } 4780bdbc534SSatish Balay } 4790bdbc534SSatish Balay } 4800bdbc534SSatish Balay } 4810bdbc534SSatish Balay } 482187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 483187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 484187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 485187ce0cbSSatish Balay #endif 4860bdbc534SSatish Balay PetscFunctionReturn(0); 4870bdbc534SSatish Balay } 4880bdbc534SSatish Balay 4890bdbc534SSatish Balay #undef __FUNC__ 4900bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4910bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4920bdbc534SSatish Balay { 4930bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4940bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4950bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 496b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 497c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 498c2760754SSatish Balay double tmp; 499187ce0cbSSatish Balay Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 5004a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 5014a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5024a15367fSSatish Balay #endif 5030bdbc534SSatish Balay 504d0a41580SSatish Balay PetscFunctionBegin; 505d0a41580SSatish Balay 5060bdbc534SSatish Balay if (roworiented) { 5070bdbc534SSatish Balay stepval = (n-1)*bs; 5080bdbc534SSatish Balay } else { 5090bdbc534SSatish Balay stepval = (m-1)*bs; 5100bdbc534SSatish Balay } 5110bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 5120bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 5130bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 5140bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 5150bdbc534SSatish Balay #endif 5160bdbc534SSatish Balay row = im[i]; 517187ce0cbSSatish Balay v_t = v + i*bs2; 518c2760754SSatish Balay if (row >= rstart && row < rend) { 5190bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5200bdbc534SSatish Balay col = in[j]; 5210bdbc534SSatish Balay 5220bdbc534SSatish Balay /* Look up into the Hash Table */ 523c2760754SSatish Balay key = row*Nbs+col+1; 524c2760754SSatish Balay h1 = HASH(size,key,tmp); 5250bdbc534SSatish Balay 526c2760754SSatish Balay idx = h1; 527187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 528187ce0cbSSatish Balay total_ct++; 529187ce0cbSSatish Balay insert_ct++; 530187ce0cbSSatish Balay if (HT[idx] != key) { 531187ce0cbSSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 532187ce0cbSSatish Balay if (idx == size) { 533187ce0cbSSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 534187ce0cbSSatish Balay if (idx == h1) { 535187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 536187ce0cbSSatish Balay } 537187ce0cbSSatish Balay } 538187ce0cbSSatish Balay } 539187ce0cbSSatish Balay #else 540c2760754SSatish Balay if (HT[idx] != key) { 541c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 542c2760754SSatish Balay if (idx == size) { 543c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 544c2760754SSatish Balay if (idx == h1) { 545c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 546c2760754SSatish Balay } 547c2760754SSatish Balay } 548c2760754SSatish Balay } 549187ce0cbSSatish Balay #endif 550c2760754SSatish Balay baij_a = HD[idx]; 5510bdbc534SSatish Balay if (roworiented) { 552c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 553187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 554187ce0cbSSatish Balay value = v_t; 555187ce0cbSSatish Balay v_t += bs; 556fef45726SSatish Balay if (addv == ADD_VALUES) { 557c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 558c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 559fef45726SSatish Balay baij_a[jj] += *value++; 560b4cc0f5aSSatish Balay } 561b4cc0f5aSSatish Balay } 562fef45726SSatish Balay } else { 563c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 564c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 565fef45726SSatish Balay baij_a[jj] = *value++; 566fef45726SSatish Balay } 567fef45726SSatish Balay } 568fef45726SSatish Balay } 5690bdbc534SSatish Balay } else { 5700bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 571fef45726SSatish Balay if (addv == ADD_VALUES) { 572b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 5730bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 574fef45726SSatish Balay baij_a[jj] += *value++; 575fef45726SSatish Balay } 576fef45726SSatish Balay } 577fef45726SSatish Balay } else { 578fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 579fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 580fef45726SSatish Balay baij_a[jj] = *value++; 581fef45726SSatish Balay } 582b4cc0f5aSSatish Balay } 5830bdbc534SSatish Balay } 5840bdbc534SSatish Balay } 5850bdbc534SSatish Balay } 5860bdbc534SSatish Balay } else { 5870bdbc534SSatish Balay if (!baij->donotstash) { 5880bdbc534SSatish Balay if (roworiented ) { 5890bdbc534SSatish Balay row = im[i]*bs; 5900bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5910bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5920bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5930bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 5940bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5950bdbc534SSatish Balay } 5960bdbc534SSatish Balay } 5970bdbc534SSatish Balay } 5980bdbc534SSatish Balay } else { 5990bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 6000bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 6010bdbc534SSatish Balay col = in[j]*bs; 6020bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 6030bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 6040bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 6050bdbc534SSatish Balay } 6060bdbc534SSatish Balay } 6070bdbc534SSatish Balay } 6080bdbc534SSatish Balay } 6090bdbc534SSatish Balay } 6100bdbc534SSatish Balay } 6110bdbc534SSatish Balay } 612187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 613187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 614187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 615187ce0cbSSatish Balay #endif 6160bdbc534SSatish Balay PetscFunctionReturn(0); 6170bdbc534SSatish Balay } 618133cdb44SSatish Balay 6190bdbc534SSatish Balay #undef __FUNC__ 6205615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 621ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 622d6de1c52SSatish Balay { 623d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 624d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 62548e59246SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 626d6de1c52SSatish Balay 627133cdb44SSatish Balay PetscFunctionBegin; 628d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 629a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 630a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 631d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 632d6de1c52SSatish Balay row = idxm[i] - bsrstart; 633d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 634a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 635a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 636d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 637d6de1c52SSatish Balay col = idxn[j] - bscstart; 63898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 639d64ed03dSBarry Smith } else { 640905e6a2fSBarry Smith if (!baij->colmap) { 641905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 642905e6a2fSBarry Smith } 64348e59246SSatish Balay #if defined (USE_CTABLE) 644fa46199cSSatish Balay ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 645fa46199cSSatish Balay data --; 64648e59246SSatish Balay #else 64748e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 64848e59246SSatish Balay #endif 64948e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 650d9d09a02SSatish Balay else { 65148e59246SSatish Balay col = data + idxn[j]%bs; 65298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 653d6de1c52SSatish Balay } 654d6de1c52SSatish Balay } 655d6de1c52SSatish Balay } 656d64ed03dSBarry Smith } else { 657a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 658d6de1c52SSatish Balay } 659d6de1c52SSatish Balay } 6603a40ed3dSBarry Smith PetscFunctionReturn(0); 661d6de1c52SSatish Balay } 662d6de1c52SSatish Balay 6635615d1e5SSatish Balay #undef __FUNC__ 6645615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 665ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 666d6de1c52SSatish Balay { 667d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 668d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 669acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 670d6de1c52SSatish Balay double sum = 0.0; 671d6de1c52SSatish Balay Scalar *v; 672d6de1c52SSatish Balay 673d64ed03dSBarry Smith PetscFunctionBegin; 674d6de1c52SSatish Balay if (baij->size == 1) { 675d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 676d6de1c52SSatish Balay } else { 677d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 678d6de1c52SSatish Balay v = amat->a; 679d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 6803a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 681e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 682d6de1c52SSatish Balay #else 683d6de1c52SSatish Balay sum += (*v)*(*v); v++; 684d6de1c52SSatish Balay #endif 685d6de1c52SSatish Balay } 686d6de1c52SSatish Balay v = bmat->a; 687d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 6883a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 689e20fef11SSatish Balay sum += PetscReal(PetscConj(*v)*(*v)); v++; 690d6de1c52SSatish Balay #else 691d6de1c52SSatish Balay sum += (*v)*(*v); v++; 692d6de1c52SSatish Balay #endif 693d6de1c52SSatish Balay } 694ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 695d6de1c52SSatish Balay *norm = sqrt(*norm); 696d64ed03dSBarry Smith } else { 697e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 698d6de1c52SSatish Balay } 699d64ed03dSBarry Smith } 7003a40ed3dSBarry Smith PetscFunctionReturn(0); 701d6de1c52SSatish Balay } 70257b952d6SSatish Balay 703bd7f49f5SSatish Balay 704fef45726SSatish Balay /* 705fef45726SSatish Balay Creates the hash table, and sets the table 706fef45726SSatish Balay This table is created only once. 707fef45726SSatish Balay If new entried need to be added to the matrix 708fef45726SSatish Balay then the hash table has to be destroyed and 709fef45726SSatish Balay recreated. 710fef45726SSatish Balay */ 711fef45726SSatish Balay #undef __FUNC__ 712fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 713d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 714596b8d2eSBarry Smith { 715596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 716596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 717596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 7180bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7194a15367fSSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart; 720187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 721fef45726SSatish Balay int *HT,key; 7220bdbc534SSatish Balay Scalar **HD; 723c2760754SSatish Balay double tmp; 7244a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g) 7254a15367fSSatish Balay int ct=0,max=0; 7264a15367fSSatish Balay #endif 727fef45726SSatish Balay 728d64ed03dSBarry Smith PetscFunctionBegin; 7290bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7300bdbc534SSatish Balay size = baij->ht_size; 731fef45726SSatish Balay 7320bdbc534SSatish Balay if (baij->ht) { 7330bdbc534SSatish Balay PetscFunctionReturn(0); 734596b8d2eSBarry Smith } 7350bdbc534SSatish Balay 736fef45726SSatish Balay /* Allocate Memory for Hash Table */ 737b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 738b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 739b9e4cc15SSatish Balay HD = baij->hd; 740a07cd24cSSatish Balay HT = baij->ht; 741b9e4cc15SSatish Balay 742b9e4cc15SSatish Balay 743c2760754SSatish Balay PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 7440bdbc534SSatish Balay 745596b8d2eSBarry Smith 746596b8d2eSBarry Smith /* Loop Over A */ 7470bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 748596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 7490bdbc534SSatish Balay row = i+rstart; 7500bdbc534SSatish Balay col = aj[j]+cstart; 751596b8d2eSBarry Smith 752187ce0cbSSatish Balay key = row*Nbs + col + 1; 753c2760754SSatish Balay h1 = HASH(size,key,tmp); 7540bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7550bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7560bdbc534SSatish Balay HT[(h1+k)%size] = key; 7570bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 758596b8d2eSBarry Smith break; 759187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 760187ce0cbSSatish Balay } else { 761187ce0cbSSatish Balay ct++; 762187ce0cbSSatish Balay #endif 763596b8d2eSBarry Smith } 764187ce0cbSSatish Balay } 765187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 766187ce0cbSSatish Balay if (k> max) max = k; 767187ce0cbSSatish Balay #endif 768596b8d2eSBarry Smith } 769596b8d2eSBarry Smith } 770596b8d2eSBarry Smith /* Loop Over B */ 7710bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 772596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 7730bdbc534SSatish Balay row = i+rstart; 7740bdbc534SSatish Balay col = garray[bj[j]]; 775187ce0cbSSatish Balay key = row*Nbs + col + 1; 776c2760754SSatish Balay h1 = HASH(size,key,tmp); 7770bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7780bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7790bdbc534SSatish Balay HT[(h1+k)%size] = key; 7800bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 781596b8d2eSBarry Smith break; 782187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 783187ce0cbSSatish Balay } else { 784187ce0cbSSatish Balay ct++; 785187ce0cbSSatish Balay #endif 786596b8d2eSBarry Smith } 787187ce0cbSSatish Balay } 788187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 789187ce0cbSSatish Balay if (k> max) max = k; 790187ce0cbSSatish Balay #endif 791596b8d2eSBarry Smith } 792596b8d2eSBarry Smith } 793596b8d2eSBarry Smith 794596b8d2eSBarry Smith /* Print Summary */ 795187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 796c2760754SSatish Balay for ( i=0,j=0; i<size; i++) 797596b8d2eSBarry Smith if (HT[i]) {j++;} 798187ce0cbSSatish Balay PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 799187ce0cbSSatish Balay (j== 0)? 0.0:((double)(ct+j))/j,max); 800187ce0cbSSatish Balay #endif 8013a40ed3dSBarry Smith PetscFunctionReturn(0); 802596b8d2eSBarry Smith } 80357b952d6SSatish Balay 804*bbb85fb3SSatish Balay #if defined (__JUNK__) 805*bbb85fb3SSatish Balay #undef __FUNC__ 806*bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 807*bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 808*bbb85fb3SSatish Balay { 809*bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 810*bbb85fb3SSatish Balay MPI_Comm comm = mat->comm; 811*bbb85fb3SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 812*bbb85fb3SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 813*bbb85fb3SSatish Balay MPI_Request *send_waits,*recv_waits; 814*bbb85fb3SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 815*bbb85fb3SSatish Balay InsertMode addv; 816*bbb85fb3SSatish Balay Scalar *rvalues,*svalues; 817*bbb85fb3SSatish Balay 818*bbb85fb3SSatish Balay PetscFunctionBegin; 819*bbb85fb3SSatish Balay if (baij->donotstash) { 820*bbb85fb3SSatish Balay baij->svalues = 0; baij->rvalues = 0; 821*bbb85fb3SSatish Balay baij->nsends = 0; baij->nrecvs = 0; 822*bbb85fb3SSatish Balay baij->send_waits = 0; baij->recv_waits = 0; 823*bbb85fb3SSatish Balay baij->rmax = 0; 824*bbb85fb3SSatish Balay PetscFunctionReturn(0); 825*bbb85fb3SSatish Balay } 826*bbb85fb3SSatish Balay 827*bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 828*bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 829*bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 830*bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 831*bbb85fb3SSatish Balay } 832*bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 833*bbb85fb3SSatish Balay 834*bbb85fb3SSatish Balay /* first count number of contributors to each processor */ 835*bbb85fb3SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 836*bbb85fb3SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 837*bbb85fb3SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 838*bbb85fb3SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 839*bbb85fb3SSatish Balay idx = baij->stash.idx[i]; 840*bbb85fb3SSatish Balay for ( j=0; j<size; j++ ) { 841*bbb85fb3SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 842*bbb85fb3SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 843*bbb85fb3SSatish Balay } 844*bbb85fb3SSatish Balay } 845*bbb85fb3SSatish Balay } 846*bbb85fb3SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 847*bbb85fb3SSatish Balay 848*bbb85fb3SSatish Balay /* inform other processors of number of messages and max length*/ 849*bbb85fb3SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 850*bbb85fb3SSatish Balay ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 851*bbb85fb3SSatish Balay nreceives = work[rank]; 852*bbb85fb3SSatish Balay ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 853*bbb85fb3SSatish Balay nmax = work[rank]; 854*bbb85fb3SSatish Balay PetscFree(work); 855*bbb85fb3SSatish Balay 856*bbb85fb3SSatish Balay /* post receives: 857*bbb85fb3SSatish Balay 1) each message will consist of ordered pairs 858*bbb85fb3SSatish Balay (global index,value) we store the global index as a double 859*bbb85fb3SSatish Balay to simplify the message passing. 860*bbb85fb3SSatish Balay 2) since we don't know how long each individual message is we 861*bbb85fb3SSatish Balay allocate the largest needed buffer for each receive. Potentially 862*bbb85fb3SSatish Balay this is a lot of wasted space. 863*bbb85fb3SSatish Balay 864*bbb85fb3SSatish Balay 865*bbb85fb3SSatish Balay This could be done better. 866*bbb85fb3SSatish Balay */ 867*bbb85fb3SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 868*bbb85fb3SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 869*bbb85fb3SSatish Balay for ( i=0; i<nreceives; i++ ) { 870*bbb85fb3SSatish Balay ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 871*bbb85fb3SSatish Balay } 872*bbb85fb3SSatish Balay 873*bbb85fb3SSatish Balay /* do sends: 874*bbb85fb3SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 875*bbb85fb3SSatish Balay the ith processor 876*bbb85fb3SSatish Balay */ 877*bbb85fb3SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 878*bbb85fb3SSatish Balay send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 879*bbb85fb3SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 880*bbb85fb3SSatish Balay starts[0] = 0; 881*bbb85fb3SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 882*bbb85fb3SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 883*bbb85fb3SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 884*bbb85fb3SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 885*bbb85fb3SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 886*bbb85fb3SSatish Balay } 887*bbb85fb3SSatish Balay PetscFree(owner); 888*bbb85fb3SSatish Balay starts[0] = 0; 889*bbb85fb3SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 890*bbb85fb3SSatish Balay count = 0; 891*bbb85fb3SSatish Balay for ( i=0; i<size; i++ ) { 892*bbb85fb3SSatish Balay if (procs[i]) { 893*bbb85fb3SSatish Balay ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 894*bbb85fb3SSatish Balay } 895*bbb85fb3SSatish Balay } 896*bbb85fb3SSatish Balay PetscFree(starts); PetscFree(nprocs); 897*bbb85fb3SSatish Balay 898*bbb85fb3SSatish Balay /* Free cache space */ 899*bbb85fb3SSatish Balay PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 900*bbb85fb3SSatish Balay ierr = StashReset_Private(&baij->stash); CHKERRQ(ierr); 901*bbb85fb3SSatish Balay 902*bbb85fb3SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 903*bbb85fb3SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 904*bbb85fb3SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 905*bbb85fb3SSatish Balay baij->rmax = nmax; 906*bbb85fb3SSatish Balay 907*bbb85fb3SSatish Balay PetscFunctionReturn(0); 908*bbb85fb3SSatish Balay } 9094a69d603SSatish Balay 9105615d1e5SSatish Balay #undef __FUNC__ 9115615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 912ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 91357b952d6SSatish Balay { 91457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 91557b952d6SSatish Balay MPI_Status *send_status,recv_status; 916*bbb85fb3SSatish Balay int imdex,count = nrecvs, i, n, ierr; 917b7029e64SSatish Balay int bs=baij->bs,row,col,other_disassembled; 918*bbb85fb3SSatish Balay Scalar val; 919e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 92057b952d6SSatish Balay 921d64ed03dSBarry Smith PetscFunctionBegin; 92257b952d6SSatish Balay /* wait on receives */ 92357b952d6SSatish Balay while (count) { 924ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 92557b952d6SSatish Balay /* unpack receives into our local space */ 92657b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 927ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 92857b952d6SSatish Balay n = n/3; 92957b952d6SSatish Balay for ( i=0; i<n; i++ ) { 93057b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 93157b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 93257b952d6SSatish Balay val = values[3*i+2]; 93357b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 93457b952d6SSatish Balay col -= baij->cstart*bs; 9354a69d603SSatish Balay ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 936d64ed03dSBarry Smith } else { 93757b952d6SSatish Balay if (mat->was_assembled) { 938905e6a2fSBarry Smith if (!baij->colmap) { 939905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 940905e6a2fSBarry Smith } 94148e59246SSatish Balay #if defined (USE_CTABLE) 942fa46199cSSatish Balay ierr = TableFind(baij->colmap,col/bs+1,&col); CHKERRQ(ierr); 943fa46199cSSatish Balay col = col - 1 + col%bs; 94448e59246SSatish Balay #else 945a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 94648e59246SSatish Balay #endif 94757b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 94857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 94957b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 95057b952d6SSatish Balay } 95157b952d6SSatish Balay } 9524a69d603SSatish Balay ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 95357b952d6SSatish Balay } 95457b952d6SSatish Balay } 95557b952d6SSatish Balay count--; 95657b952d6SSatish Balay } 957570da906SBarry Smith if (baij->recv_waits) PetscFree(baij->recv_waits); 958570da906SBarry Smith if (baij->rvalues) PetscFree(baij->rvalues); 95957b952d6SSatish Balay 96057b952d6SSatish Balay /* wait on sends */ 96157b952d6SSatish Balay if (baij->nsends) { 962d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 963ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 96457b952d6SSatish Balay PetscFree(send_status); 96557b952d6SSatish Balay } 966570da906SBarry Smith if (baij->send_waits) PetscFree(baij->send_waits); 967570da906SBarry Smith if (baij->svalues) PetscFree(baij->svalues); 96857b952d6SSatish Balay 96957b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 97057b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 97157b952d6SSatish Balay 97257b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 97357b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 9746e713f22SBarry Smith /* 9756e713f22SBarry Smith if nonzero structure of submatrix B cannot change then we know that 9766e713f22SBarry Smith no processor disassembled thus we can skip this stuff 9776e713f22SBarry Smith */ 9786e713f22SBarry Smith if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 979ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 98057b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 98157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 98257b952d6SSatish Balay } 9836e713f22SBarry Smith } 98457b952d6SSatish Balay 9856d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 98657b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 98757b952d6SSatish Balay } 98857b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 98957b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 99057b952d6SSatish Balay 991187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g) 992187ce0cbSSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 993187ce0cbSSatish Balay PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 994187ce0cbSSatish Balay ((double)baij->ht_total_ct)/baij->ht_insert_ct); 995187ce0cbSSatish Balay baij->ht_total_ct = 0; 996187ce0cbSSatish Balay baij->ht_insert_ct = 0; 997187ce0cbSSatish Balay } 998187ce0cbSSatish Balay #endif 999133cdb44SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1000133cdb44SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 1001f830108cSBarry Smith mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1002f830108cSBarry Smith mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1003bd7f49f5SSatish Balay } 1004187ce0cbSSatish Balay 100557b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 10063a40ed3dSBarry Smith PetscFunctionReturn(0); 100757b952d6SSatish Balay } 1008*bbb85fb3SSatish Balay #endif 1009*bbb85fb3SSatish Balay #undef __FUNC__ 1010*bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 1011*bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 1012*bbb85fb3SSatish Balay { 1013*bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1014*bbb85fb3SSatish Balay int ierr; 1015*bbb85fb3SSatish Balay InsertMode addv; 1016*bbb85fb3SSatish Balay 1017*bbb85fb3SSatish Balay PetscFunctionBegin; 1018*bbb85fb3SSatish Balay if (baij->donotstash) { 1019*bbb85fb3SSatish Balay PetscFunctionReturn(0); 1020*bbb85fb3SSatish Balay } 1021*bbb85fb3SSatish Balay 1022*bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 1023*bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 1024*bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 1025*bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 1026*bbb85fb3SSatish Balay } 1027*bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 1028*bbb85fb3SSatish Balay 1029*bbb85fb3SSatish Balay ierr = StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr); 1030*bbb85fb3SSatish Balay 1031*bbb85fb3SSatish Balay /* Free cache space */ 1032*bbb85fb3SSatish Balay PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 1033*bbb85fb3SSatish Balay PetscFunctionReturn(0); 1034*bbb85fb3SSatish Balay } 1035*bbb85fb3SSatish Balay 1036*bbb85fb3SSatish Balay #undef __FUNC__ 1037*bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 1038*bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 1039*bbb85fb3SSatish Balay { 1040*bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1041*bbb85fb3SSatish Balay int imdex,i,j,rstart,ncols,n,ierr; 1042*bbb85fb3SSatish Balay int *row,*col,other_disassembled; 1043*bbb85fb3SSatish Balay Scalar *val; 1044*bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 1045*bbb85fb3SSatish Balay 1046*bbb85fb3SSatish Balay PetscFunctionBegin; 1047*bbb85fb3SSatish Balay if (!baij->donotstash) { 1048*bbb85fb3SSatish Balay ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr); 1049*bbb85fb3SSatish Balay for (imdex=0; imdex<baij->stash.nrecvs; imdex++ ) { 1050*bbb85fb3SSatish Balay n = baij->stash.rdata[imdex].n; 1051*bbb85fb3SSatish Balay row = baij->stash.rdata[imdex].i; 1052*bbb85fb3SSatish Balay col = baij->stash.rdata[imdex].j; 1053*bbb85fb3SSatish Balay val = baij->stash.rdata[imdex].a; 1054*bbb85fb3SSatish Balay for ( i=0; i<n; ) { 1055*bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1056*bbb85fb3SSatish Balay for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 1057*bbb85fb3SSatish Balay if (j < n) ncols = j-i; 1058*bbb85fb3SSatish Balay else ncols = n-i; 1059*bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 1060*bbb85fb3SSatish Balay ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 1061*bbb85fb3SSatish Balay i = j; 1062*bbb85fb3SSatish Balay } 1063*bbb85fb3SSatish Balay } 1064*bbb85fb3SSatish Balay ierr = StashReset_Private(&baij->stash); CHKERRQ(ierr); 1065*bbb85fb3SSatish Balay } 1066*bbb85fb3SSatish Balay 1067*bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 1068*bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 1069*bbb85fb3SSatish Balay 1070*bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1071*bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1072*bbb85fb3SSatish Balay /* 1073*bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1074*bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1075*bbb85fb3SSatish Balay */ 1076*bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 1077*bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1078*bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1079*bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 1080*bbb85fb3SSatish Balay } 1081*bbb85fb3SSatish Balay } 1082*bbb85fb3SSatish Balay 1083*bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1084*bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 1085*bbb85fb3SSatish Balay } 1086*bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 1087*bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 1088*bbb85fb3SSatish Balay 1089*bbb85fb3SSatish Balay #if defined(USE_PETSC_BOPT_g) 1090*bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1091*bbb85fb3SSatish Balay PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 1092*bbb85fb3SSatish Balay ((double)baij->ht_total_ct)/baij->ht_insert_ct); 1093*bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1094*bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1095*bbb85fb3SSatish Balay } 1096*bbb85fb3SSatish Balay #endif 1097*bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1098*bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 1099*bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1100*bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1101*bbb85fb3SSatish Balay } 1102*bbb85fb3SSatish Balay 1103*bbb85fb3SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 1104*bbb85fb3SSatish Balay PetscFunctionReturn(0); 1105*bbb85fb3SSatish Balay } 110657b952d6SSatish Balay 11075615d1e5SSatish Balay #undef __FUNC__ 11085615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 110957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 111057b952d6SSatish Balay { 111157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 111257b952d6SSatish Balay int ierr; 111357b952d6SSatish Balay 1114d64ed03dSBarry Smith PetscFunctionBegin; 111557b952d6SSatish Balay if (baij->size == 1) { 111657b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1117a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 11183a40ed3dSBarry Smith PetscFunctionReturn(0); 111957b952d6SSatish Balay } 112057b952d6SSatish Balay 11215615d1e5SSatish Balay #undef __FUNC__ 11227b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 11237b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 112457b952d6SSatish Balay { 112557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 112677ed5343SBarry Smith int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 112757b952d6SSatish Balay FILE *fd; 112857b952d6SSatish Balay ViewerType vtype; 112957b952d6SSatish Balay 1130d64ed03dSBarry Smith PetscFunctionBegin; 113157b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 11323f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 1133d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 1134639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 11354e220ebcSLois Curfman McInnes MatInfo info; 113657b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 113757b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1138d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 113957b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 114057b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 11414e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 11424e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 11434e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 11444e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 11454e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 11464e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 114757b952d6SSatish Balay fflush(fd); 114857b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 114957b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 11503a40ed3dSBarry Smith PetscFunctionReturn(0); 1151d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1152bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 11533a40ed3dSBarry Smith PetscFunctionReturn(0); 115457b952d6SSatish Balay } 115557b952d6SSatish Balay } 115657b952d6SSatish Balay 11573f1db9ecSBarry Smith if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 115857b952d6SSatish Balay Draw draw; 115957b952d6SSatish Balay PetscTruth isnull; 116077ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 11613a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 116257b952d6SSatish Balay } 116357b952d6SSatish Balay 116457b952d6SSatish Balay if (size == 1) { 116557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1166d64ed03dSBarry Smith } else { 116757b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 116857b952d6SSatish Balay Mat A; 116957b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 117040011551SBarry Smith int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 117157b952d6SSatish Balay int mbs = baij->mbs; 117257b952d6SSatish Balay Scalar *a; 117357b952d6SSatish Balay 117457b952d6SSatish Balay if (!rank) { 117555843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1176d64ed03dSBarry Smith } else { 117755843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 117857b952d6SSatish Balay } 117957b952d6SSatish Balay PLogObjectParent(mat,A); 118057b952d6SSatish Balay 118157b952d6SSatish Balay /* copy over the A part */ 118257b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 118357b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 118457b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 118557b952d6SSatish Balay 118657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 118757b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 118857b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 118957b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 119057b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 119157b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1192cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1193cee3aa6bSSatish Balay col++; a += bs; 119457b952d6SSatish Balay } 119557b952d6SSatish Balay } 119657b952d6SSatish Balay } 119757b952d6SSatish Balay /* copy over the B part */ 119857b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 119957b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 120057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 120157b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 120257b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 120357b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 120457b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 120557b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1206cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1207cee3aa6bSSatish Balay col++; a += bs; 120857b952d6SSatish Balay } 120957b952d6SSatish Balay } 121057b952d6SSatish Balay } 121157b952d6SSatish Balay PetscFree(rvals); 12126d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12136d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 121455843e3eSBarry Smith /* 121555843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 121655843e3eSBarry Smith synchronized across all processors that share the Draw object 121755843e3eSBarry Smith */ 12183f1db9ecSBarry Smith if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 121957b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 122057b952d6SSatish Balay } 122157b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 122257b952d6SSatish Balay } 12233a40ed3dSBarry Smith PetscFunctionReturn(0); 122457b952d6SSatish Balay } 122557b952d6SSatish Balay 122657b952d6SSatish Balay 122757b952d6SSatish Balay 12285615d1e5SSatish Balay #undef __FUNC__ 12295615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1230e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 123157b952d6SSatish Balay { 123257b952d6SSatish Balay int ierr; 123357b952d6SSatish Balay ViewerType vtype; 123457b952d6SSatish Balay 1235d64ed03dSBarry Smith PetscFunctionBegin; 123657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 12373f1db9ecSBarry Smith if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 12387b2a1423SBarry Smith PetscTypeCompare(vtype,SOCKET_VIEWER)) { 12397b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 12403f1db9ecSBarry Smith } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 12413a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 12425cd90555SBarry Smith } else { 12435cd90555SBarry Smith SETERRQ(1,1,"Viewer type not supported by PETSc object"); 124457b952d6SSatish Balay } 12453a40ed3dSBarry Smith PetscFunctionReturn(0); 124657b952d6SSatish Balay } 124757b952d6SSatish Balay 12485615d1e5SSatish Balay #undef __FUNC__ 12495615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1250e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 125179bdfe76SSatish Balay { 125279bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 125379bdfe76SSatish Balay int ierr; 125479bdfe76SSatish Balay 1255d64ed03dSBarry Smith PetscFunctionBegin; 125698dd23e9SBarry Smith if (--mat->refct > 0) PetscFunctionReturn(0); 125798dd23e9SBarry Smith 125898dd23e9SBarry Smith if (mat->mapping) { 125998dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 126098dd23e9SBarry Smith } 126198dd23e9SBarry Smith if (mat->bmapping) { 126298dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 126398dd23e9SBarry Smith } 126461b13de0SBarry Smith if (mat->rmap) { 126561b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 126661b13de0SBarry Smith } 126761b13de0SBarry Smith if (mat->cmap) { 126861b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 126961b13de0SBarry Smith } 12703a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 1271e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 127279bdfe76SSatish Balay #endif 127379bdfe76SSatish Balay 127483e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 127579bdfe76SSatish Balay PetscFree(baij->rowners); 127679bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 127779bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 127848e59246SSatish Balay #if defined (USE_CTABLE) 127948e59246SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 128048e59246SSatish Balay #else 128179bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 128248e59246SSatish Balay #endif 128379bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 128479bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 128579bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 128679bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 128730793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1288b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 128979bdfe76SSatish Balay PetscFree(baij); 129079bdfe76SSatish Balay PLogObjectDestroy(mat); 129179bdfe76SSatish Balay PetscHeaderDestroy(mat); 12923a40ed3dSBarry Smith PetscFunctionReturn(0); 129379bdfe76SSatish Balay } 129479bdfe76SSatish Balay 12955615d1e5SSatish Balay #undef __FUNC__ 12965615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1297ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1298cee3aa6bSSatish Balay { 1299cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 130047b4a8eaSLois Curfman McInnes int ierr, nt; 1301cee3aa6bSSatish Balay 1302d64ed03dSBarry Smith PetscFunctionBegin; 1303e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 130447b4a8eaSLois Curfman McInnes if (nt != a->n) { 1305a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 130647b4a8eaSLois Curfman McInnes } 1307e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 130847b4a8eaSLois Curfman McInnes if (nt != a->m) { 1309a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 131047b4a8eaSLois Curfman McInnes } 131143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1312f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 131343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1314f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 131543a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 13163a40ed3dSBarry Smith PetscFunctionReturn(0); 1317cee3aa6bSSatish Balay } 1318cee3aa6bSSatish Balay 13195615d1e5SSatish Balay #undef __FUNC__ 13205615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1321ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1322cee3aa6bSSatish Balay { 1323cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1324cee3aa6bSSatish Balay int ierr; 1325d64ed03dSBarry Smith 1326d64ed03dSBarry Smith PetscFunctionBegin; 132743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1328f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 132943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1330f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 13313a40ed3dSBarry Smith PetscFunctionReturn(0); 1332cee3aa6bSSatish Balay } 1333cee3aa6bSSatish Balay 13345615d1e5SSatish Balay #undef __FUNC__ 13355615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1336ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1337cee3aa6bSSatish Balay { 1338cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1339cee3aa6bSSatish Balay int ierr; 1340cee3aa6bSSatish Balay 1341d64ed03dSBarry Smith PetscFunctionBegin; 1342cee3aa6bSSatish Balay /* do nondiagonal part */ 1343f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1344cee3aa6bSSatish Balay /* send it on its way */ 1345537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1346cee3aa6bSSatish Balay /* do local part */ 1347f830108cSBarry Smith ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1348cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1349cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1350cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1351639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13523a40ed3dSBarry Smith PetscFunctionReturn(0); 1353cee3aa6bSSatish Balay } 1354cee3aa6bSSatish Balay 13555615d1e5SSatish Balay #undef __FUNC__ 13565615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1357ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1358cee3aa6bSSatish Balay { 1359cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1360cee3aa6bSSatish Balay int ierr; 1361cee3aa6bSSatish Balay 1362d64ed03dSBarry Smith PetscFunctionBegin; 1363cee3aa6bSSatish Balay /* do nondiagonal part */ 1364f830108cSBarry Smith ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1365cee3aa6bSSatish Balay /* send it on its way */ 1366537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1367cee3aa6bSSatish Balay /* do local part */ 1368f830108cSBarry Smith ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1369cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1370cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1371cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1372537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 13733a40ed3dSBarry Smith PetscFunctionReturn(0); 1374cee3aa6bSSatish Balay } 1375cee3aa6bSSatish Balay 1376cee3aa6bSSatish Balay /* 1377cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1378cee3aa6bSSatish Balay diagonal block 1379cee3aa6bSSatish Balay */ 13805615d1e5SSatish Balay #undef __FUNC__ 13815615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1382ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1383cee3aa6bSSatish Balay { 1384cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 13853a40ed3dSBarry Smith int ierr; 1386d64ed03dSBarry Smith 1387d64ed03dSBarry Smith PetscFunctionBegin; 1388a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 13893a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13903a40ed3dSBarry Smith PetscFunctionReturn(0); 1391cee3aa6bSSatish Balay } 1392cee3aa6bSSatish Balay 13935615d1e5SSatish Balay #undef __FUNC__ 13945615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1395ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1396cee3aa6bSSatish Balay { 1397cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1398cee3aa6bSSatish Balay int ierr; 1399d64ed03dSBarry Smith 1400d64ed03dSBarry Smith PetscFunctionBegin; 1401cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1402cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 14033a40ed3dSBarry Smith PetscFunctionReturn(0); 1404cee3aa6bSSatish Balay } 1405026e39d0SSatish Balay 14065615d1e5SSatish Balay #undef __FUNC__ 14075615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1408ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 140957b952d6SSatish Balay { 141057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1411d64ed03dSBarry Smith 1412d64ed03dSBarry Smith PetscFunctionBegin; 1413bd7f49f5SSatish Balay if (m) *m = mat->M; 1414bd7f49f5SSatish Balay if (n) *n = mat->N; 14153a40ed3dSBarry Smith PetscFunctionReturn(0); 141657b952d6SSatish Balay } 141757b952d6SSatish Balay 14185615d1e5SSatish Balay #undef __FUNC__ 14195615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1420ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 142157b952d6SSatish Balay { 142257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1423d64ed03dSBarry Smith 1424d64ed03dSBarry Smith PetscFunctionBegin; 1425f830108cSBarry Smith *m = mat->m; *n = mat->n; 14263a40ed3dSBarry Smith PetscFunctionReturn(0); 142757b952d6SSatish Balay } 142857b952d6SSatish Balay 14295615d1e5SSatish Balay #undef __FUNC__ 14305615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1431ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 143257b952d6SSatish Balay { 143357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1434d64ed03dSBarry Smith 1435d64ed03dSBarry Smith PetscFunctionBegin; 143657b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 14373a40ed3dSBarry Smith PetscFunctionReturn(0); 143857b952d6SSatish Balay } 143957b952d6SSatish Balay 14405615d1e5SSatish Balay #undef __FUNC__ 14415615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1442acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1443acdf5bf4SSatish Balay { 1444acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1445acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1446acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1447d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1448d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1449acdf5bf4SSatish Balay 1450d64ed03dSBarry Smith PetscFunctionBegin; 1451a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1452acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1453acdf5bf4SSatish Balay 1454acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1455acdf5bf4SSatish Balay /* 1456acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1457acdf5bf4SSatish Balay */ 1458acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1459bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1460bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1461acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1463acdf5bf4SSatish Balay } 1464acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1465acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1466acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1467acdf5bf4SSatish Balay } 1468acdf5bf4SSatish Balay 1469a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1470d9d09a02SSatish Balay lrow = row - brstart; 1471acdf5bf4SSatish Balay 1472acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1473acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1474acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1475f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1476f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1477acdf5bf4SSatish Balay nztot = nzA + nzB; 1478acdf5bf4SSatish Balay 1479acdf5bf4SSatish Balay cmap = mat->garray; 1480acdf5bf4SSatish Balay if (v || idx) { 1481acdf5bf4SSatish Balay if (nztot) { 1482acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1483acdf5bf4SSatish Balay int imark = -1; 1484acdf5bf4SSatish Balay if (v) { 1485acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1486acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1487d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1488acdf5bf4SSatish Balay else break; 1489acdf5bf4SSatish Balay } 1490acdf5bf4SSatish Balay imark = i; 1491acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1492acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1493acdf5bf4SSatish Balay } 1494acdf5bf4SSatish Balay if (idx) { 1495acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1496acdf5bf4SSatish Balay if (imark > -1) { 1497acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1498bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1499acdf5bf4SSatish Balay } 1500acdf5bf4SSatish Balay } else { 1501acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1502d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1503d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1504acdf5bf4SSatish Balay else break; 1505acdf5bf4SSatish Balay } 1506acdf5bf4SSatish Balay imark = i; 1507acdf5bf4SSatish Balay } 1508d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1509d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1510acdf5bf4SSatish Balay } 1511d64ed03dSBarry Smith } else { 1512d212a18eSSatish Balay if (idx) *idx = 0; 1513d212a18eSSatish Balay if (v) *v = 0; 1514d212a18eSSatish Balay } 1515acdf5bf4SSatish Balay } 1516acdf5bf4SSatish Balay *nz = nztot; 1517f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1518f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 15193a40ed3dSBarry Smith PetscFunctionReturn(0); 1520acdf5bf4SSatish Balay } 1521acdf5bf4SSatish Balay 15225615d1e5SSatish Balay #undef __FUNC__ 15235615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1524acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1525acdf5bf4SSatish Balay { 1526acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1527d64ed03dSBarry Smith 1528d64ed03dSBarry Smith PetscFunctionBegin; 1529acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1530a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1531acdf5bf4SSatish Balay } 1532acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 15333a40ed3dSBarry Smith PetscFunctionReturn(0); 1534acdf5bf4SSatish Balay } 1535acdf5bf4SSatish Balay 15365615d1e5SSatish Balay #undef __FUNC__ 15375615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1538ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 15395a838052SSatish Balay { 15405a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1541d64ed03dSBarry Smith 1542d64ed03dSBarry Smith PetscFunctionBegin; 15435a838052SSatish Balay *bs = baij->bs; 15443a40ed3dSBarry Smith PetscFunctionReturn(0); 15455a838052SSatish Balay } 15465a838052SSatish Balay 15475615d1e5SSatish Balay #undef __FUNC__ 15485615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1549ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 155058667388SSatish Balay { 155158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 155258667388SSatish Balay int ierr; 1553d64ed03dSBarry Smith 1554d64ed03dSBarry Smith PetscFunctionBegin; 155558667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 155658667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 15573a40ed3dSBarry Smith PetscFunctionReturn(0); 155858667388SSatish Balay } 15590ac07820SSatish Balay 15605615d1e5SSatish Balay #undef __FUNC__ 15615615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1562ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 15630ac07820SSatish Balay { 15644e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 15654e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 15667d57db60SLois Curfman McInnes int ierr; 15677d57db60SLois Curfman McInnes double isend[5], irecv[5]; 15680ac07820SSatish Balay 1569d64ed03dSBarry Smith PetscFunctionBegin; 15704e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 15714e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 15720e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1573de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 15744e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 15750e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1576de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 15770ac07820SSatish Balay if (flag == MAT_LOCAL) { 15784e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15794e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15804e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15814e220ebcSLois Curfman McInnes info->memory = isend[3]; 15824e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15830ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1584f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 15854e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15864e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15874e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15884e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15894e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15900ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1591f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 15924e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15934e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15944e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15954e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15964e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1597d41123aaSBarry Smith } else { 1598d41123aaSBarry Smith SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 15990ac07820SSatish Balay } 16005a5d4f66SBarry Smith info->rows_global = (double)a->M; 16015a5d4f66SBarry Smith info->columns_global = (double)a->N; 16025a5d4f66SBarry Smith info->rows_local = (double)a->m; 16035a5d4f66SBarry Smith info->columns_local = (double)a->N; 16044e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 16054e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16064e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16073a40ed3dSBarry Smith PetscFunctionReturn(0); 16080ac07820SSatish Balay } 16090ac07820SSatish Balay 16105615d1e5SSatish Balay #undef __FUNC__ 16115615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1612ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 161358667388SSatish Balay { 161458667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 161598305bb5SBarry Smith int ierr; 161658667388SSatish Balay 1617d64ed03dSBarry Smith PetscFunctionBegin; 161858667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 161958667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 16206da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1621c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 16224787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 16234787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 162498305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 162598305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1626b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1627aeafbbfcSLois Curfman McInnes a->roworiented = 1; 162898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 162998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1630b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 16316da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 163258667388SSatish Balay op == MAT_SYMMETRIC || 163358667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1634b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 163598305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 163658667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 163798305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 163858667388SSatish Balay a->roworiented = 0; 163998305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 164098305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 16412b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 164290f02eecSBarry Smith a->donotstash = 1; 1643d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1644d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1645133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 1646133cdb44SSatish Balay a->ht_flag = 1; 1647d64ed03dSBarry Smith } else { 1648d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1649d64ed03dSBarry Smith } 16503a40ed3dSBarry Smith PetscFunctionReturn(0); 165158667388SSatish Balay } 165258667388SSatish Balay 16535615d1e5SSatish Balay #undef __FUNC__ 16545615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1655ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 16560ac07820SSatish Balay { 16570ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 16580ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 16590ac07820SSatish Balay Mat B; 166040011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 16610ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 16620ac07820SSatish Balay Scalar *a; 16630ac07820SSatish Balay 1664d64ed03dSBarry Smith PetscFunctionBegin; 1665a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 16660ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 16670ac07820SSatish Balay CHKERRQ(ierr); 16680ac07820SSatish Balay 16690ac07820SSatish Balay /* copy over the A part */ 16700ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 16710ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16720ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 16730ac07820SSatish Balay 16740ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 16750ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16760ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 16770ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 16780ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 16790ac07820SSatish Balay for (k=0; k<bs; k++ ) { 16800ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16810ac07820SSatish Balay col++; a += bs; 16820ac07820SSatish Balay } 16830ac07820SSatish Balay } 16840ac07820SSatish Balay } 16850ac07820SSatish Balay /* copy over the B part */ 16860ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 16870ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16880ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 16890ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16900ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 16910ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 16920ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16930ac07820SSatish Balay for (k=0; k<bs; k++ ) { 16940ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16950ac07820SSatish Balay col++; a += bs; 16960ac07820SSatish Balay } 16970ac07820SSatish Balay } 16980ac07820SSatish Balay } 16990ac07820SSatish Balay PetscFree(rvals); 17000ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 17010ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 17020ac07820SSatish Balay 17030ac07820SSatish Balay if (matout != PETSC_NULL) { 17040ac07820SSatish Balay *matout = B; 17050ac07820SSatish Balay } else { 1706f830108cSBarry Smith PetscOps *Abops; 1707cc2dc46cSBarry Smith MatOps Aops; 1708f830108cSBarry Smith 17090ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 17100ac07820SSatish Balay PetscFree(baij->rowners); 17110ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 17120ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1713b1fc9764SSatish Balay #if defined (USE_CTABLE) 1714b1fc9764SSatish Balay if (baij->colmap) TableDelete(baij->colmap); 1715b1fc9764SSatish Balay #else 17160ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 1717b1fc9764SSatish Balay #endif 17180ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 17190ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 17200ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 17210ac07820SSatish Balay PetscFree(baij); 1722f830108cSBarry Smith 1723f830108cSBarry Smith /* 1724f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1725f830108cSBarry Smith A pointers for the bops and ops but copy everything 1726f830108cSBarry Smith else from C. 1727f830108cSBarry Smith */ 1728f830108cSBarry Smith Abops = A->bops; 1729f830108cSBarry Smith Aops = A->ops; 1730f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1731f830108cSBarry Smith A->bops = Abops; 1732f830108cSBarry Smith A->ops = Aops; 1733f830108cSBarry Smith 17340ac07820SSatish Balay PetscHeaderDestroy(B); 17350ac07820SSatish Balay } 17363a40ed3dSBarry Smith PetscFunctionReturn(0); 17370ac07820SSatish Balay } 17380e95ebc0SSatish Balay 17395615d1e5SSatish Balay #undef __FUNC__ 17405615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 17410e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 17420e95ebc0SSatish Balay { 17430e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 17440e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 17450e95ebc0SSatish Balay int ierr,s1,s2,s3; 17460e95ebc0SSatish Balay 1747d64ed03dSBarry Smith PetscFunctionBegin; 17480e95ebc0SSatish Balay if (ll) { 17490e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 17500e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1751a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 17520e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 17530e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 17540e95ebc0SSatish Balay } 1755a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 17563a40ed3dSBarry Smith PetscFunctionReturn(0); 17570e95ebc0SSatish Balay } 17580e95ebc0SSatish Balay 17595615d1e5SSatish Balay #undef __FUNC__ 17605615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1761ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 17620ac07820SSatish Balay { 17630ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 17640ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1765a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 17660ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 17670ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1768a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 17690ac07820SSatish Balay MPI_Comm comm = A->comm; 17700ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17710ac07820SSatish Balay MPI_Status recv_status,*send_status; 17720ac07820SSatish Balay IS istmp; 17730ac07820SSatish Balay 1774d64ed03dSBarry Smith PetscFunctionBegin; 17750ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 17760ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 17770ac07820SSatish Balay 17780ac07820SSatish Balay /* first count number of contributors to each processor */ 17790ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 17800ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 17810ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 17820ac07820SSatish Balay for ( i=0; i<N; i++ ) { 17830ac07820SSatish Balay idx = rows[i]; 17840ac07820SSatish Balay found = 0; 17850ac07820SSatish Balay for ( j=0; j<size; j++ ) { 17860ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 17870ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 17880ac07820SSatish Balay } 17890ac07820SSatish Balay } 1790a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 17910ac07820SSatish Balay } 17920ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 17930ac07820SSatish Balay 17940ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 17950ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1796ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 17970ac07820SSatish Balay nrecvs = work[rank]; 1798ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 17990ac07820SSatish Balay nmax = work[rank]; 18000ac07820SSatish Balay PetscFree(work); 18010ac07820SSatish Balay 18020ac07820SSatish Balay /* post receives: */ 1803d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1804d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 18050ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1806ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 18070ac07820SSatish Balay } 18080ac07820SSatish Balay 18090ac07820SSatish Balay /* do sends: 18100ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 18110ac07820SSatish Balay the ith processor 18120ac07820SSatish Balay */ 18130ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1814ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 18150ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 18160ac07820SSatish Balay starts[0] = 0; 18170ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 18180ac07820SSatish Balay for ( i=0; i<N; i++ ) { 18190ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 18200ac07820SSatish Balay } 18210ac07820SSatish Balay ISRestoreIndices(is,&rows); 18220ac07820SSatish Balay 18230ac07820SSatish Balay starts[0] = 0; 18240ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 18250ac07820SSatish Balay count = 0; 18260ac07820SSatish Balay for ( i=0; i<size; i++ ) { 18270ac07820SSatish Balay if (procs[i]) { 1828ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 18290ac07820SSatish Balay } 18300ac07820SSatish Balay } 18310ac07820SSatish Balay PetscFree(starts); 18320ac07820SSatish Balay 18330ac07820SSatish Balay base = owners[rank]*bs; 18340ac07820SSatish Balay 18350ac07820SSatish Balay /* wait on receives */ 18360ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 18370ac07820SSatish Balay source = lens + nrecvs; 18380ac07820SSatish Balay count = nrecvs; slen = 0; 18390ac07820SSatish Balay while (count) { 1840ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 18410ac07820SSatish Balay /* unpack receives into our local space */ 1842ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 18430ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 18440ac07820SSatish Balay lens[imdex] = n; 18450ac07820SSatish Balay slen += n; 18460ac07820SSatish Balay count--; 18470ac07820SSatish Balay } 18480ac07820SSatish Balay PetscFree(recv_waits); 18490ac07820SSatish Balay 18500ac07820SSatish Balay /* move the data into the send scatter */ 18510ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 18520ac07820SSatish Balay count = 0; 18530ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 18540ac07820SSatish Balay values = rvalues + i*nmax; 18550ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 18560ac07820SSatish Balay lrows[count++] = values[j] - base; 18570ac07820SSatish Balay } 18580ac07820SSatish Balay } 18590ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 18600ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 18610ac07820SSatish Balay 18620ac07820SSatish Balay /* actually zap the local rows */ 1863029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 18640ac07820SSatish Balay PLogObjectParent(A,istmp); 1865a07cd24cSSatish Balay 186672dacd9aSBarry Smith /* 186772dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 186872dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 186972dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 187072dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 187172dacd9aSBarry Smith 187272dacd9aSBarry Smith Contributed by: Mathew Knepley 187372dacd9aSBarry Smith */ 18749c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 18750ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 18769c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 18779c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 18789c957beeSSatish Balay } else if (diag) { 18799c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1880fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1881fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1882fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18836525c446SSatish Balay } 1884a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1885a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1886a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1887a07cd24cSSatish Balay } 1888a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1889a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18909c957beeSSatish Balay } else { 18919c957beeSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1892a07cd24cSSatish Balay } 18939c957beeSSatish Balay 18949c957beeSSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 1895a07cd24cSSatish Balay PetscFree(lrows); 1896a07cd24cSSatish Balay 18970ac07820SSatish Balay /* wait on sends */ 18980ac07820SSatish Balay if (nsends) { 1899d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1900ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 19010ac07820SSatish Balay PetscFree(send_status); 19020ac07820SSatish Balay } 19030ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 19040ac07820SSatish Balay 19053a40ed3dSBarry Smith PetscFunctionReturn(0); 19060ac07820SSatish Balay } 190772dacd9aSBarry Smith 19085615d1e5SSatish Balay #undef __FUNC__ 19095615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1910ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1911ba4ca20aSSatish Balay { 1912ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 191325fdafccSSatish Balay MPI_Comm comm = A->comm; 1914133cdb44SSatish Balay static int called = 0; 19153a40ed3dSBarry Smith int ierr; 1916ba4ca20aSSatish Balay 1917d64ed03dSBarry Smith PetscFunctionBegin; 19183a40ed3dSBarry Smith if (!a->rank) { 19193a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 192025fdafccSSatish Balay } 192125fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1922133cdb44SSatish Balay (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1923133cdb44SSatish Balay (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 19243a40ed3dSBarry Smith PetscFunctionReturn(0); 1925ba4ca20aSSatish Balay } 19260ac07820SSatish Balay 19275615d1e5SSatish Balay #undef __FUNC__ 19285615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1929ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1930bb5a7306SBarry Smith { 1931bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1932bb5a7306SBarry Smith int ierr; 1933d64ed03dSBarry Smith 1934d64ed03dSBarry Smith PetscFunctionBegin; 1935bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 19363a40ed3dSBarry Smith PetscFunctionReturn(0); 1937bb5a7306SBarry Smith } 1938bb5a7306SBarry Smith 19392e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 19400ac07820SSatish Balay 194179bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1942cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1943cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1944cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1945cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1946cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1947cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 1948cc2dc46cSBarry Smith MatMultTrans_MPIBAIJ, 1949cc2dc46cSBarry Smith MatMultTransAdd_MPIBAIJ, 1950cc2dc46cSBarry Smith 0, 1951cc2dc46cSBarry Smith 0, 1952cc2dc46cSBarry Smith 0, 1953cc2dc46cSBarry Smith 0, 1954cc2dc46cSBarry Smith 0, 1955cc2dc46cSBarry Smith 0, 1956cc2dc46cSBarry Smith 0, 1957cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1958cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 1959cc2dc46cSBarry Smith 0, 1960cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1961cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1962cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1963cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1964cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1965cc2dc46cSBarry Smith 0, 1966cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1967cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1968cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1969cc2dc46cSBarry Smith 0, 1970cc2dc46cSBarry Smith 0, 1971cc2dc46cSBarry Smith 0, 1972cc2dc46cSBarry Smith 0, 1973cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1974cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1975cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1976cc2dc46cSBarry Smith 0, 1977cc2dc46cSBarry Smith 0, 1978cc2dc46cSBarry Smith 0, 1979cc2dc46cSBarry Smith 0, 19802e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1981cc2dc46cSBarry Smith 0, 1982cc2dc46cSBarry Smith 0, 1983cc2dc46cSBarry Smith 0, 1984cc2dc46cSBarry Smith 0, 1985cc2dc46cSBarry Smith 0, 1986cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1987cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1988cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1989cc2dc46cSBarry Smith 0, 1990cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1991cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1992cc2dc46cSBarry Smith 0, 1993cc2dc46cSBarry Smith 0, 1994cc2dc46cSBarry Smith 0, 1995cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1996cc2dc46cSBarry Smith 0, 1997cc2dc46cSBarry Smith 0, 1998cc2dc46cSBarry Smith 0, 1999cc2dc46cSBarry Smith 0, 2000cc2dc46cSBarry Smith 0, 2001cc2dc46cSBarry Smith 0, 2002cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 2003cc2dc46cSBarry Smith 0, 2004cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 2005cc2dc46cSBarry Smith 0, 2006cc2dc46cSBarry Smith 0, 2007cc2dc46cSBarry Smith 0, 2008cc2dc46cSBarry Smith MatGetMaps_Petsc}; 200979bdfe76SSatish Balay 20105ef9f2a5SBarry Smith 2011e18c124aSSatish Balay EXTERN_C_BEGIN 20125ef9f2a5SBarry Smith #undef __FUNC__ 20135ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 20145ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20155ef9f2a5SBarry Smith { 20165ef9f2a5SBarry Smith PetscFunctionBegin; 20175ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20185ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20195ef9f2a5SBarry Smith PetscFunctionReturn(0); 20205ef9f2a5SBarry Smith } 2021e18c124aSSatish Balay EXTERN_C_END 202279bdfe76SSatish Balay 20235615d1e5SSatish Balay #undef __FUNC__ 20245615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 202579bdfe76SSatish Balay /*@C 202679bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 202779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 202879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 202979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 203079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 203179bdfe76SSatish Balay 2032db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2033db81eaa0SLois Curfman McInnes 203479bdfe76SSatish Balay Input Parameters: 2035db81eaa0SLois Curfman McInnes + comm - MPI communicator 203679bdfe76SSatish Balay . bs - size of blockk 203779bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 203892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 203992e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 204092e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 204192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 204292e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2043be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2044be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 204579bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 204679bdfe76SSatish Balay submatrix (same for all local rows) 204792e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 204892e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2049db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 205092e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 205179bdfe76SSatish Balay submatrix (same for all local rows). 2052db81eaa0SLois Curfman McInnes - o_nzz - array containing the number of nonzeros in the various block rows of the 205392e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 205492e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 205579bdfe76SSatish Balay 205679bdfe76SSatish Balay Output Parameter: 205779bdfe76SSatish Balay . A - the matrix 205879bdfe76SSatish Balay 2059db81eaa0SLois Curfman McInnes Options Database Keys: 2060db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2061db81eaa0SLois Curfman McInnes block calculations (much slower) 2062db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 2063494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 2064494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 20653ffaccefSLois Curfman McInnes 2066b259b22eSLois Curfman McInnes Notes: 206779bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 206879bdfe76SSatish Balay (possibly both). 206979bdfe76SSatish Balay 2070be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2071be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2072be79a94dSBarry Smith 207379bdfe76SSatish Balay Storage Information: 207479bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 207579bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 207679bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 207779bdfe76SSatish Balay local matrix (a rectangular submatrix). 207879bdfe76SSatish Balay 207979bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 208079bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 208179bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 208279bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 208379bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 208479bdfe76SSatish Balay 208579bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 208679bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 208779bdfe76SSatish Balay 2088db81eaa0SLois Curfman McInnes .vb 2089db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2090db81eaa0SLois Curfman McInnes ------------------- 2091db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2092db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2093db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2094db81eaa0SLois Curfman McInnes ------------------- 2095db81eaa0SLois Curfman McInnes .ve 209679bdfe76SSatish Balay 209779bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 209879bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 209979bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 210057b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 210179bdfe76SSatish Balay 2102d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2103d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 210479bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 210592e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 210692e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 21076da5968aSLois Curfman McInnes matrices. 210879bdfe76SSatish Balay 2109027ccd11SLois Curfman McInnes Level: intermediate 2110027ccd11SLois Curfman McInnes 211192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 211279bdfe76SSatish Balay 2113db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 211479bdfe76SSatish Balay @*/ 211579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 211679bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 211779bdfe76SSatish Balay { 211879bdfe76SSatish Balay Mat B; 211979bdfe76SSatish Balay Mat_MPIBAIJ *b; 2120133cdb44SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 2121a2ab621fSBarry Smith int flag1 = 0,flag2 = 0; 212279bdfe76SSatish Balay 2123d64ed03dSBarry Smith PetscFunctionBegin; 2124a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 21253914022bSBarry Smith 21263914022bSBarry Smith MPI_Comm_size(comm,&size); 2127494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 2128494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 2129494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 21303914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 21313914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 21323914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 21333a40ed3dSBarry Smith PetscFunctionReturn(0); 21343914022bSBarry Smith } 21353914022bSBarry Smith 213679bdfe76SSatish Balay *A = 0; 21373f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 213879bdfe76SSatish Balay PLogObjectCreate(B); 213979bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 214079bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 2141cc2dc46cSBarry Smith PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 21424c50302cSBarry Smith 2143e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 2144e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 214590f02eecSBarry Smith B->mapping = 0; 214679bdfe76SSatish Balay B->factor = 0; 214779bdfe76SSatish Balay B->assembled = PETSC_FALSE; 214879bdfe76SSatish Balay 2149e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 215079bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 215179bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 215279bdfe76SSatish Balay 2153d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2154a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2155d64ed03dSBarry Smith } 2156a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2157a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2158a8c6a408SBarry Smith } 2159a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2160a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2161a8c6a408SBarry Smith } 2162cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2163cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 216479bdfe76SSatish Balay 216579bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 216679bdfe76SSatish Balay work[0] = m; work[1] = n; 216779bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2168ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 216979bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 217079bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 217179bdfe76SSatish Balay } 217279bdfe76SSatish Balay if (m == PETSC_DECIDE) { 217379bdfe76SSatish Balay Mbs = M/bs; 2174a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 217579bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 217679bdfe76SSatish Balay m = mbs*bs; 217779bdfe76SSatish Balay } 217879bdfe76SSatish Balay if (n == PETSC_DECIDE) { 217979bdfe76SSatish Balay Nbs = N/bs; 2180a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 218179bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 218279bdfe76SSatish Balay n = nbs*bs; 218379bdfe76SSatish Balay } 2184a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2185a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2186a8c6a408SBarry Smith } 218779bdfe76SSatish Balay 218879bdfe76SSatish Balay b->m = m; B->m = m; 218979bdfe76SSatish Balay b->n = n; B->n = n; 219079bdfe76SSatish Balay b->N = N; B->N = N; 219179bdfe76SSatish Balay b->M = M; B->M = M; 219279bdfe76SSatish Balay b->bs = bs; 219379bdfe76SSatish Balay b->bs2 = bs*bs; 219479bdfe76SSatish Balay b->mbs = mbs; 219579bdfe76SSatish Balay b->nbs = nbs; 219679bdfe76SSatish Balay b->Mbs = Mbs; 219779bdfe76SSatish Balay b->Nbs = Nbs; 219879bdfe76SSatish Balay 2199c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2200c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2201488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2202488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2203c7fcc2eaSBarry Smith 220479bdfe76SSatish Balay /* build local table of row and column ownerships */ 220579bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2206f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 22070ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2208ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 220979bdfe76SSatish Balay b->rowners[0] = 0; 221079bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 221179bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 221279bdfe76SSatish Balay } 221379bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 221479bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 22154fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 22164fa0d573SSatish Balay b->rend_bs = b->rend * bs; 22174fa0d573SSatish Balay 2218ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 221979bdfe76SSatish Balay b->cowners[0] = 0; 222079bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 222179bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 222279bdfe76SSatish Balay } 222379bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 222479bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 22254fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 22264fa0d573SSatish Balay b->cend_bs = b->cend * bs; 222779bdfe76SSatish Balay 2228a07cd24cSSatish Balay 222979bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2230029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 223179bdfe76SSatish Balay PLogObjectParent(B,b->A); 223279bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2233029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 223479bdfe76SSatish Balay PLogObjectParent(B,b->B); 223579bdfe76SSatish Balay 223679bdfe76SSatish Balay /* build cache for off array entries formed */ 2237*bbb85fb3SSatish Balay ierr = StashCreate_Private(comm,bs,&b->stash); CHKERRQ(ierr); 223890f02eecSBarry Smith b->donotstash = 0; 223979bdfe76SSatish Balay b->colmap = 0; 224079bdfe76SSatish Balay b->garray = 0; 224179bdfe76SSatish Balay b->roworiented = 1; 224279bdfe76SSatish Balay 224330793edcSSatish Balay /* stuff used in block assembly */ 224430793edcSSatish Balay b->barray = 0; 224530793edcSSatish Balay 224679bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 224779bdfe76SSatish Balay b->lvec = 0; 224879bdfe76SSatish Balay b->Mvctx = 0; 224979bdfe76SSatish Balay 225079bdfe76SSatish Balay /* stuff for MatGetRow() */ 225179bdfe76SSatish Balay b->rowindices = 0; 225279bdfe76SSatish Balay b->rowvalues = 0; 225379bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 225479bdfe76SSatish Balay 2255a07cd24cSSatish Balay /* hash table stuff */ 2256a07cd24cSSatish Balay b->ht = 0; 2257187ce0cbSSatish Balay b->hd = 0; 22580bdbc534SSatish Balay b->ht_size = 0; 2259133cdb44SSatish Balay b->ht_flag = 0; 226025fdafccSSatish Balay b->ht_fact = 0; 2261187ce0cbSSatish Balay b->ht_total_ct = 0; 2262187ce0cbSSatish Balay b->ht_insert_ct = 0; 2263a07cd24cSSatish Balay 226479bdfe76SSatish Balay *A = B; 2265133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2266133cdb44SSatish Balay if (flg) { 2267133cdb44SSatish Balay double fact = 1.39; 2268133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2269133cdb44SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2270133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2271133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2272133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2273133cdb44SSatish Balay } 22745ef9f2a5SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 22755ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 22765ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 22773a40ed3dSBarry Smith PetscFunctionReturn(0); 227879bdfe76SSatish Balay } 2279026e39d0SSatish Balay 22805615d1e5SSatish Balay #undef __FUNC__ 22812e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ" 22822e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 22830ac07820SSatish Balay { 22840ac07820SSatish Balay Mat mat; 22850ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 22860ac07820SSatish Balay int ierr, len=0, flg; 22870ac07820SSatish Balay 2288d64ed03dSBarry Smith PetscFunctionBegin; 22890ac07820SSatish Balay *newmat = 0; 22903f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 22910ac07820SSatish Balay PLogObjectCreate(mat); 22920ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2293cc2dc46cSBarry Smith PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2294e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2295e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 22960ac07820SSatish Balay mat->factor = matin->factor; 22970ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2298aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 22990ac07820SSatish Balay 23000ac07820SSatish Balay a->m = mat->m = oldmat->m; 23010ac07820SSatish Balay a->n = mat->n = oldmat->n; 23020ac07820SSatish Balay a->M = mat->M = oldmat->M; 23030ac07820SSatish Balay a->N = mat->N = oldmat->N; 23040ac07820SSatish Balay 23050ac07820SSatish Balay a->bs = oldmat->bs; 23060ac07820SSatish Balay a->bs2 = oldmat->bs2; 23070ac07820SSatish Balay a->mbs = oldmat->mbs; 23080ac07820SSatish Balay a->nbs = oldmat->nbs; 23090ac07820SSatish Balay a->Mbs = oldmat->Mbs; 23100ac07820SSatish Balay a->Nbs = oldmat->Nbs; 23110ac07820SSatish Balay 23120ac07820SSatish Balay a->rstart = oldmat->rstart; 23130ac07820SSatish Balay a->rend = oldmat->rend; 23140ac07820SSatish Balay a->cstart = oldmat->cstart; 23150ac07820SSatish Balay a->cend = oldmat->cend; 23160ac07820SSatish Balay a->size = oldmat->size; 23170ac07820SSatish Balay a->rank = oldmat->rank; 2318aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2319aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2320aef5e8e0SSatish Balay a->rowindices = 0; 23210ac07820SSatish Balay a->rowvalues = 0; 23220ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 232330793edcSSatish Balay a->barray = 0; 23243123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 23253123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 23263123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 23273123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 23280ac07820SSatish Balay 2329133cdb44SSatish Balay /* hash table stuff */ 2330133cdb44SSatish Balay a->ht = 0; 2331133cdb44SSatish Balay a->hd = 0; 2332133cdb44SSatish Balay a->ht_size = 0; 2333133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 233425fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2335133cdb44SSatish Balay a->ht_total_ct = 0; 2336133cdb44SSatish Balay a->ht_insert_ct = 0; 2337133cdb44SSatish Balay 2338133cdb44SSatish Balay 23390ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2340f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 23410ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 23420ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2343*bbb85fb3SSatish Balay ierr = StashCreate_Private(matin->comm,oldmat->bs,&a->stash); CHKERRQ(ierr); 23440ac07820SSatish Balay if (oldmat->colmap) { 234548e59246SSatish Balay #if defined (USE_CTABLE) 2346fa46199cSSatish Balay ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 234748e59246SSatish Balay #else 23480ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 23490ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 23500ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 235148e59246SSatish Balay #endif 23520ac07820SSatish Balay } else a->colmap = 0; 23530ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 23540ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 23550ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 23560ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 23570ac07820SSatish Balay } else a->garray = 0; 23580ac07820SSatish Balay 23590ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 23600ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 23610ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2362e18c124aSSatish Balay 23630ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 23642e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 23650ac07820SSatish Balay PLogObjectParent(mat,a->A); 23662e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 23670ac07820SSatish Balay PLogObjectParent(mat,a->B); 23680ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2369e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 23700ac07820SSatish Balay if (flg) { 23710ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 23720ac07820SSatish Balay } 23730ac07820SSatish Balay *newmat = mat; 23743a40ed3dSBarry Smith PetscFunctionReturn(0); 23750ac07820SSatish Balay } 237657b952d6SSatish Balay 237757b952d6SSatish Balay #include "sys.h" 237857b952d6SSatish Balay 23795615d1e5SSatish Balay #undef __FUNC__ 23805615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 238157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 238257b952d6SSatish Balay { 238357b952d6SSatish Balay Mat A; 238457b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 238557b952d6SSatish Balay Scalar *vals,*buf; 238657b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 238757b952d6SSatish Balay MPI_Status status; 2388cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 238957b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 239040011551SBarry Smith int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 239157b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 239257b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 239357b952d6SSatish Balay 2394d64ed03dSBarry Smith PetscFunctionBegin; 239557b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 239657b952d6SSatish Balay 239757b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 239857b952d6SSatish Balay if (!rank) { 239957b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2400e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2401a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2402d64ed03dSBarry Smith if (header[3] < 0) { 2403a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2404d64ed03dSBarry Smith } 24056c5fab8fSBarry Smith } 2406d64ed03dSBarry Smith 2407ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 240857b952d6SSatish Balay M = header[1]; N = header[2]; 240957b952d6SSatish Balay 2410a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 241157b952d6SSatish Balay 241257b952d6SSatish Balay /* 241357b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 241457b952d6SSatish Balay divisible by the blocksize 241557b952d6SSatish Balay */ 241657b952d6SSatish Balay Mbs = M/bs; 241757b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 241857b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 241957b952d6SSatish Balay else Mbs++; 242057b952d6SSatish Balay if (extra_rows &&!rank) { 2421b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 242257b952d6SSatish Balay } 2423537820f0SBarry Smith 242457b952d6SSatish Balay /* determine ownership of all rows */ 242557b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 242657b952d6SSatish Balay m = mbs * bs; 2427cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2428cee3aa6bSSatish Balay browners = rowners + size + 1; 2429ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 243057b952d6SSatish Balay rowners[0] = 0; 2431cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2432cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 243357b952d6SSatish Balay rstart = rowners[rank]; 243457b952d6SSatish Balay rend = rowners[rank+1]; 243557b952d6SSatish Balay 243657b952d6SSatish Balay /* distribute row lengths to all processors */ 243757b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 243857b952d6SSatish Balay if (!rank) { 243957b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2440e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 244157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 244257b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2443cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2444ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 244557b952d6SSatish Balay PetscFree(sndcounts); 2446d64ed03dSBarry Smith } else { 2447ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 244857b952d6SSatish Balay } 244957b952d6SSatish Balay 245057b952d6SSatish Balay if (!rank) { 245157b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 245257b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 245357b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 245457b952d6SSatish Balay for ( i=0; i<size; i++ ) { 245557b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 245657b952d6SSatish Balay procsnz[i] += rowlengths[j]; 245757b952d6SSatish Balay } 245857b952d6SSatish Balay } 245957b952d6SSatish Balay PetscFree(rowlengths); 246057b952d6SSatish Balay 246157b952d6SSatish Balay /* determine max buffer needed and allocate it */ 246257b952d6SSatish Balay maxnz = 0; 246357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 246457b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 246557b952d6SSatish Balay } 246657b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 246757b952d6SSatish Balay 246857b952d6SSatish Balay /* read in my part of the matrix column indices */ 246957b952d6SSatish Balay nz = procsnz[0]; 247057b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 247157b952d6SSatish Balay mycols = ibuf; 2472cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2473e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2474cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2475cee3aa6bSSatish Balay 247657b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 247757b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 247857b952d6SSatish Balay nz = procsnz[i]; 2479e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2480ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 248157b952d6SSatish Balay } 248257b952d6SSatish Balay /* read in the stuff for the last proc */ 248357b952d6SSatish Balay if ( size != 1 ) { 248457b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2485e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 248657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2487ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 248857b952d6SSatish Balay } 248957b952d6SSatish Balay PetscFree(cols); 2490d64ed03dSBarry Smith } else { 249157b952d6SSatish Balay /* determine buffer space needed for message */ 249257b952d6SSatish Balay nz = 0; 249357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 249457b952d6SSatish Balay nz += locrowlens[i]; 249557b952d6SSatish Balay } 249657b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 249757b952d6SSatish Balay mycols = ibuf; 249857b952d6SSatish Balay /* receive message of column indices*/ 2499ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2500ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2501a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 250257b952d6SSatish Balay } 250357b952d6SSatish Balay 250457b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2505cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2506cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 250757b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2508cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 250957b952d6SSatish Balay masked1 = mask + Mbs; 251057b952d6SSatish Balay masked2 = masked1 + Mbs; 251157b952d6SSatish Balay rowcount = 0; nzcount = 0; 251257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 251357b952d6SSatish Balay dcount = 0; 251457b952d6SSatish Balay odcount = 0; 251557b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 251657b952d6SSatish Balay kmax = locrowlens[rowcount]; 251757b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 251857b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 251957b952d6SSatish Balay if (!mask[tmp]) { 252057b952d6SSatish Balay mask[tmp] = 1; 252157b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 252257b952d6SSatish Balay else masked1[dcount++] = tmp; 252357b952d6SSatish Balay } 252457b952d6SSatish Balay } 252557b952d6SSatish Balay rowcount++; 252657b952d6SSatish Balay } 2527cee3aa6bSSatish Balay 252857b952d6SSatish Balay dlens[i] = dcount; 252957b952d6SSatish Balay odlens[i] = odcount; 2530cee3aa6bSSatish Balay 253157b952d6SSatish Balay /* zero out the mask elements we set */ 253257b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 253357b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 253457b952d6SSatish Balay } 2535cee3aa6bSSatish Balay 253657b952d6SSatish Balay /* create our matrix */ 2537537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2538537820f0SBarry Smith CHKERRQ(ierr); 253957b952d6SSatish Balay A = *newmat; 25406d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 254157b952d6SSatish Balay 254257b952d6SSatish Balay if (!rank) { 254357b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 254457b952d6SSatish Balay /* read in my part of the matrix numerical values */ 254557b952d6SSatish Balay nz = procsnz[0]; 254657b952d6SSatish Balay vals = buf; 2547cee3aa6bSSatish Balay mycols = ibuf; 2548cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2549e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2550cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2551537820f0SBarry Smith 255257b952d6SSatish Balay /* insert into matrix */ 255357b952d6SSatish Balay jj = rstart*bs; 255457b952d6SSatish Balay for ( i=0; i<m; i++ ) { 255557b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 255657b952d6SSatish Balay mycols += locrowlens[i]; 255757b952d6SSatish Balay vals += locrowlens[i]; 255857b952d6SSatish Balay jj++; 255957b952d6SSatish Balay } 256057b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 256157b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 256257b952d6SSatish Balay nz = procsnz[i]; 256357b952d6SSatish Balay vals = buf; 2564e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2565ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 256657b952d6SSatish Balay } 256757b952d6SSatish Balay /* the last proc */ 256857b952d6SSatish Balay if ( size != 1 ){ 256957b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2570cee3aa6bSSatish Balay vals = buf; 2571e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 257257b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2573ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 257457b952d6SSatish Balay } 257557b952d6SSatish Balay PetscFree(procsnz); 2576d64ed03dSBarry Smith } else { 257757b952d6SSatish Balay /* receive numeric values */ 257857b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 257957b952d6SSatish Balay 258057b952d6SSatish Balay /* receive message of values*/ 258157b952d6SSatish Balay vals = buf; 2582cee3aa6bSSatish Balay mycols = ibuf; 2583ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2584ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2585a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 258657b952d6SSatish Balay 258757b952d6SSatish Balay /* insert into matrix */ 258857b952d6SSatish Balay jj = rstart*bs; 2589cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 259057b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 259157b952d6SSatish Balay mycols += locrowlens[i]; 259257b952d6SSatish Balay vals += locrowlens[i]; 259357b952d6SSatish Balay jj++; 259457b952d6SSatish Balay } 259557b952d6SSatish Balay } 259657b952d6SSatish Balay PetscFree(locrowlens); 259757b952d6SSatish Balay PetscFree(buf); 259857b952d6SSatish Balay PetscFree(ibuf); 259957b952d6SSatish Balay PetscFree(rowners); 260057b952d6SSatish Balay PetscFree(dlens); 2601cee3aa6bSSatish Balay PetscFree(mask); 26026d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 26036d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 26043a40ed3dSBarry Smith PetscFunctionReturn(0); 260557b952d6SSatish Balay } 260657b952d6SSatish Balay 260757b952d6SSatish Balay 2608133cdb44SSatish Balay 2609133cdb44SSatish Balay #undef __FUNC__ 2610133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2611133cdb44SSatish Balay /*@ 2612133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2613133cdb44SSatish Balay 2614133cdb44SSatish Balay Input Parameters: 2615133cdb44SSatish Balay . mat - the matrix 2616133cdb44SSatish Balay . fact - factor 2617133cdb44SSatish Balay 2618fee21e36SBarry Smith Collective on Mat 2619fee21e36SBarry Smith 2620133cdb44SSatish Balay Notes: 2621133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2622133cdb44SSatish Balay 2623133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2624133cdb44SSatish Balay 2625133cdb44SSatish Balay .seealso: MatSetOption() 2626133cdb44SSatish Balay @*/ 2627133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2628133cdb44SSatish Balay { 262925fdafccSSatish Balay Mat_MPIBAIJ *baij; 2630133cdb44SSatish Balay 2631133cdb44SSatish Balay PetscFunctionBegin; 2632133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 263325fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2634133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2635133cdb44SSatish Balay } 2636133cdb44SSatish Balay baij = (Mat_MPIBAIJ*) mat->data; 2637133cdb44SSatish Balay baij->ht_fact = fact; 2638133cdb44SSatish Balay PetscFunctionReturn(0); 2639133cdb44SSatish Balay } 2640