179bdfe76SSatish Balay #ifndef lint 2*abef11f7SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.57 1997/03/13 16:30:14 curfman Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 243b2fbd54SBarry Smith 25537820f0SBarry Smith /* 26537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2957b952d6SSatish Balay length of colmap equals the global matrix length. 3057b952d6SSatish Balay */ 315615d1e5SSatish Balay #undef __FUNC__ 325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 37928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3857b952d6SSatish Balay 3957b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 4057b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4157b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 42928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4357b952d6SSatish Balay return 0; 4457b952d6SSatish Balay } 4557b952d6SSatish Balay 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 5057b952d6SSatish Balay { 513b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5257b952d6SSatish Balay int ierr; 533b2fbd54SBarry Smith if (aij->size == 1) { 543b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 55e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 563b2fbd54SBarry Smith return 0; 573b2fbd54SBarry Smith } 583b2fbd54SBarry Smith 595615d1e5SSatish Balay #undef __FUNC__ 605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 623b2fbd54SBarry Smith PetscTruth *done) 633b2fbd54SBarry Smith { 643b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 653b2fbd54SBarry Smith int ierr; 663b2fbd54SBarry Smith if (aij->size == 1) { 673b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 68e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6957b952d6SSatish Balay return 0; 7057b952d6SSatish Balay } 7180c1aa95SSatish Balay #define CHUNKSIZE 10 7280c1aa95SSatish Balay 73f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7480c1aa95SSatish Balay { \ 7580c1aa95SSatish Balay \ 7680c1aa95SSatish Balay brow = row/bs; \ 7780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 7880c1aa95SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; \ 7980c1aa95SSatish Balay bcol = col/bs; \ 8080c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 81ab26458aSBarry Smith low = 0; high = nrow; \ 82ab26458aSBarry Smith while (high-low > 3) { \ 83ab26458aSBarry Smith t = (low+high)/2; \ 84ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 85ab26458aSBarry Smith else low = t; \ 86ab26458aSBarry Smith } \ 87ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 8880c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8980c1aa95SSatish Balay if (rp[_i] == bcol) { \ 9080c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 91eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 92eada6651SSatish Balay else *bap = value; \ 9380c1aa95SSatish Balay goto _noinsert; \ 9480c1aa95SSatish Balay } \ 9580c1aa95SSatish Balay } \ 960520107fSSatish Balay if (a->nonew) goto _noinsert; \ 9780c1aa95SSatish Balay if (nrow >= rmax) { \ 9880c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9980c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 10080c1aa95SSatish Balay Scalar *new_a; \ 10180c1aa95SSatish Balay \ 10280c1aa95SSatish Balay /* malloc new storage space */ \ 10380c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 10480c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 10580c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10680c1aa95SSatish Balay new_i = new_j + new_nz; \ 10780c1aa95SSatish Balay \ 10880c1aa95SSatish Balay /* copy over old data into new slots */ \ 10980c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 11080c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 11180c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 11280c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 11380c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 11480c1aa95SSatish Balay len*sizeof(int)); \ 11580c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11680c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11780c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 11880c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 11980c1aa95SSatish Balay /* free up old matrix storage */ \ 12080c1aa95SSatish Balay PetscFree(a->a); \ 12180c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 12280c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 12380c1aa95SSatish Balay a->singlemalloc = 1; \ 12480c1aa95SSatish Balay \ 12580c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 12680c1aa95SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; \ 12780c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 12880c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 12980c1aa95SSatish Balay a->reallocs++; \ 13080c1aa95SSatish Balay a->nz++; \ 13180c1aa95SSatish Balay } \ 13280c1aa95SSatish Balay N = nrow++ - 1; \ 13380c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 13480c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 13580c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13680c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13780c1aa95SSatish Balay } \ 13880c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 13980c1aa95SSatish Balay rp[_i] = bcol; \ 14080c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 14180c1aa95SSatish Balay _noinsert:; \ 14280c1aa95SSatish Balay ailen[brow] = nrow; \ 14380c1aa95SSatish Balay } 14457b952d6SSatish Balay 145639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1465615d1e5SSatish Balay #undef __FUNC__ 1475615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 148ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 14957b952d6SSatish Balay { 15057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 15157b952d6SSatish Balay Scalar value; 1524fa0d573SSatish Balay int ierr,i,j,row,col; 1534fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1544fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1554fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 15657b952d6SSatish Balay 157eada6651SSatish Balay /* Some Variables required in the macro */ 15880c1aa95SSatish Balay Mat A = baij->A; 15980c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 16080c1aa95SSatish Balay int *rp,ii,nrow,_i,rmax,N; 16180c1aa95SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 16280c1aa95SSatish Balay int *aj=a->j,brow,bcol; 163ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 16480c1aa95SSatish Balay Scalar *ap,*aa=a->a,*bap; 16580c1aa95SSatish Balay 16657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 167639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 168e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 169e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 170639f9d9dSBarry Smith #endif 17157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 17257b952d6SSatish Balay row = im[i] - rstart_orig; 17357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 17457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 17557b952d6SSatish Balay col = in[j] - cstart_orig; 17657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 177f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 17880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 17957b952d6SSatish Balay } 180639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 181e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 182e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 183639f9d9dSBarry Smith #endif 18457b952d6SSatish Balay else { 18557b952d6SSatish Balay if (mat->was_assembled) { 186905e6a2fSBarry Smith if (!baij->colmap) { 187905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 188905e6a2fSBarry Smith } 189905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 19057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 19157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 19257b952d6SSatish Balay col = in[j]; 19357b952d6SSatish Balay } 19457b952d6SSatish Balay } 19557b952d6SSatish Balay else col = in[j]; 19657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 197639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 19857b952d6SSatish Balay } 19957b952d6SSatish Balay } 20057b952d6SSatish Balay } 20157b952d6SSatish Balay else { 20290f02eecSBarry Smith if (roworiented && !baij->donotstash) { 20357b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 20457b952d6SSatish Balay } 20557b952d6SSatish Balay else { 20690f02eecSBarry Smith if (!baij->donotstash) { 20757b952d6SSatish Balay row = im[i]; 20857b952d6SSatish Balay for ( j=0; j<n; j++ ) { 20957b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 21057b952d6SSatish Balay } 21157b952d6SSatish Balay } 21257b952d6SSatish Balay } 21357b952d6SSatish Balay } 21490f02eecSBarry Smith } 21557b952d6SSatish Balay return 0; 21657b952d6SSatish Balay } 21757b952d6SSatish Balay 218ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 219ab26458aSBarry Smith #undef __FUNC__ 220ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 221ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 222ab26458aSBarry Smith { 223ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 224ab26458aSBarry Smith Scalar *value,*tmp; 225*abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 226ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 227ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 228ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 229ab26458aSBarry Smith 230ab26458aSBarry Smith /* Should be stashed somewhere to avoid multiple mallocs */ 231ab26458aSBarry Smith tmp = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(tmp); 232ab26458aSBarry Smith if (roworiented) { 233ab26458aSBarry Smith stepval = (n-1)*bs; 234ab26458aSBarry Smith } else { 235ab26458aSBarry Smith stepval = (m-1)*bs; 236ab26458aSBarry Smith } 237ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 238ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 239ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 240ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 241ab26458aSBarry Smith #endif 242ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 243ab26458aSBarry Smith row = im[i] - rstart; 244ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 245ab26458aSBarry Smith if (roworiented) { 246ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 247ab26458aSBarry Smith } else { 248ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 249*abef11f7SSatish Balay } 250ab26458aSBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) 251ab26458aSBarry Smith for (jj=0; jj<bs; jj++ ) 252ab26458aSBarry Smith *tmp++ = *value++; 253ab26458aSBarry Smith tmp -=bs2; 254*abef11f7SSatish Balay 255*abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 256*abef11f7SSatish Balay col = in[j] - cstart; 257ab26458aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,tmp,addv);CHKERRQ(ierr); 258ab26458aSBarry Smith } 259ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 260ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 261ab26458aSBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 262ab26458aSBarry Smith #endif 263ab26458aSBarry Smith else { 264ab26458aSBarry Smith if (mat->was_assembled) { 265ab26458aSBarry Smith if (!baij->colmap) { 266ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 267ab26458aSBarry Smith } 268ab26458aSBarry Smith col = baij->colmap[in[j]] - 1; 269ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 270ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 271ab26458aSBarry Smith col = in[j]; 272ab26458aSBarry Smith } 273ab26458aSBarry Smith } 274ab26458aSBarry Smith else col = in[j]; 275ab26458aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,tmp,addv);CHKERRQ(ierr); 276ab26458aSBarry Smith } 277ab26458aSBarry Smith } 278ab26458aSBarry Smith } 279ab26458aSBarry Smith else { 280ab26458aSBarry Smith if (!baij->donotstash) { 281ab26458aSBarry Smith if (roworiented ) { 282*abef11f7SSatish Balay row = im[i]*bs; 283*abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 284*abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 285*abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 286*abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 287*abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 288*abef11f7SSatish Balay } 289ab26458aSBarry Smith } 290ab26458aSBarry Smith } 291ab26458aSBarry Smith } 292ab26458aSBarry Smith else { 293ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 294*abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 295*abef11f7SSatish Balay col = in[j]*bs; 296*abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 297*abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 298*abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 299ab26458aSBarry Smith } 300ab26458aSBarry Smith } 301ab26458aSBarry Smith } 302*abef11f7SSatish Balay 303*abef11f7SSatish Balay } 304*abef11f7SSatish Balay } 305ab26458aSBarry Smith } 306ab26458aSBarry Smith } 307ab26458aSBarry Smith PetscFree(tmp); 308ab26458aSBarry Smith return 0; 309ab26458aSBarry Smith } 310ab26458aSBarry Smith 3115615d1e5SSatish Balay #undef __FUNC__ 3125615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 313ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 314d6de1c52SSatish Balay { 315d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 316d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 317d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 318d6de1c52SSatish Balay 319d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 320e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 321e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 322d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 323d6de1c52SSatish Balay row = idxm[i] - bsrstart; 324d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 325e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 326e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 327d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 328d6de1c52SSatish Balay col = idxn[j] - bscstart; 329d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 330d6de1c52SSatish Balay } 331d6de1c52SSatish Balay else { 332905e6a2fSBarry Smith if (!baij->colmap) { 333905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 334905e6a2fSBarry Smith } 335e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 336dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 337d9d09a02SSatish Balay else { 338dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 339d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 340d6de1c52SSatish Balay } 341d6de1c52SSatish Balay } 342d6de1c52SSatish Balay } 343d9d09a02SSatish Balay } 344d6de1c52SSatish Balay else { 345e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 346d6de1c52SSatish Balay } 347d6de1c52SSatish Balay } 348d6de1c52SSatish Balay return 0; 349d6de1c52SSatish Balay } 350d6de1c52SSatish Balay 3515615d1e5SSatish Balay #undef __FUNC__ 3525615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 353ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 354d6de1c52SSatish Balay { 355d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 356d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 357acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 358d6de1c52SSatish Balay double sum = 0.0; 359d6de1c52SSatish Balay Scalar *v; 360d6de1c52SSatish Balay 361d6de1c52SSatish Balay if (baij->size == 1) { 362d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 363d6de1c52SSatish Balay } else { 364d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 365d6de1c52SSatish Balay v = amat->a; 366d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 367d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 368d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 369d6de1c52SSatish Balay #else 370d6de1c52SSatish Balay sum += (*v)*(*v); v++; 371d6de1c52SSatish Balay #endif 372d6de1c52SSatish Balay } 373d6de1c52SSatish Balay v = bmat->a; 374d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 375d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 376d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 377d6de1c52SSatish Balay #else 378d6de1c52SSatish Balay sum += (*v)*(*v); v++; 379d6de1c52SSatish Balay #endif 380d6de1c52SSatish Balay } 381d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 382d6de1c52SSatish Balay *norm = sqrt(*norm); 383d6de1c52SSatish Balay } 384acdf5bf4SSatish Balay else 385e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 386d6de1c52SSatish Balay } 387d6de1c52SSatish Balay return 0; 388d6de1c52SSatish Balay } 38957b952d6SSatish Balay 3905615d1e5SSatish Balay #undef __FUNC__ 3915615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 392ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 39357b952d6SSatish Balay { 39457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 39557b952d6SSatish Balay MPI_Comm comm = mat->comm; 39657b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 39757b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 39857b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 39957b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 40057b952d6SSatish Balay InsertMode addv; 40157b952d6SSatish Balay Scalar *rvalues,*svalues; 40257b952d6SSatish Balay 40357b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 404e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 40557b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 406e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 40757b952d6SSatish Balay } 408e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 40957b952d6SSatish Balay 41057b952d6SSatish Balay /* first count number of contributors to each processor */ 41157b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 41257b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 41357b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 41457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 41557b952d6SSatish Balay idx = baij->stash.idx[i]; 41657b952d6SSatish Balay for ( j=0; j<size; j++ ) { 41757b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 41857b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 41957b952d6SSatish Balay } 42057b952d6SSatish Balay } 42157b952d6SSatish Balay } 42257b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 42357b952d6SSatish Balay 42457b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 42557b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 42657b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 42757b952d6SSatish Balay nreceives = work[rank]; 42857b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 42957b952d6SSatish Balay nmax = work[rank]; 43057b952d6SSatish Balay PetscFree(work); 43157b952d6SSatish Balay 43257b952d6SSatish Balay /* post receives: 43357b952d6SSatish Balay 1) each message will consist of ordered pairs 43457b952d6SSatish Balay (global index,value) we store the global index as a double 43557b952d6SSatish Balay to simplify the message passing. 43657b952d6SSatish Balay 2) since we don't know how long each individual message is we 43757b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 43857b952d6SSatish Balay this is a lot of wasted space. 43957b952d6SSatish Balay 44057b952d6SSatish Balay 44157b952d6SSatish Balay This could be done better. 44257b952d6SSatish Balay */ 44357b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 44457b952d6SSatish Balay CHKPTRQ(rvalues); 44557b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 44657b952d6SSatish Balay CHKPTRQ(recv_waits); 44757b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 44857b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 44957b952d6SSatish Balay comm,recv_waits+i); 45057b952d6SSatish Balay } 45157b952d6SSatish Balay 45257b952d6SSatish Balay /* do sends: 45357b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 45457b952d6SSatish Balay the ith processor 45557b952d6SSatish Balay */ 45657b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 45757b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 45857b952d6SSatish Balay CHKPTRQ(send_waits); 45957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 46057b952d6SSatish Balay starts[0] = 0; 46157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 46257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 46357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 46457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 46557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 46657b952d6SSatish Balay } 46757b952d6SSatish Balay PetscFree(owner); 46857b952d6SSatish Balay starts[0] = 0; 46957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 47057b952d6SSatish Balay count = 0; 47157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 47257b952d6SSatish Balay if (procs[i]) { 47357b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 47457b952d6SSatish Balay comm,send_waits+count++); 47557b952d6SSatish Balay } 47657b952d6SSatish Balay } 47757b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 47857b952d6SSatish Balay 47957b952d6SSatish Balay /* Free cache space */ 480d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 48157b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 48257b952d6SSatish Balay 48357b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 48457b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 48557b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 48657b952d6SSatish Balay baij->rmax = nmax; 48757b952d6SSatish Balay 48857b952d6SSatish Balay return 0; 48957b952d6SSatish Balay } 49057b952d6SSatish Balay 49157b952d6SSatish Balay 4925615d1e5SSatish Balay #undef __FUNC__ 4935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 494ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 49557b952d6SSatish Balay { 49657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 49757b952d6SSatish Balay MPI_Status *send_status,recv_status; 49857b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 49957b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 50057b952d6SSatish Balay Scalar *values,val; 501e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 50257b952d6SSatish Balay 50357b952d6SSatish Balay /* wait on receives */ 50457b952d6SSatish Balay while (count) { 50557b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 50657b952d6SSatish Balay /* unpack receives into our local space */ 50757b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 50857b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 50957b952d6SSatish Balay n = n/3; 51057b952d6SSatish Balay for ( i=0; i<n; i++ ) { 51157b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 51257b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 51357b952d6SSatish Balay val = values[3*i+2]; 51457b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 51557b952d6SSatish Balay col -= baij->cstart*bs; 51657b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 51757b952d6SSatish Balay } 51857b952d6SSatish Balay else { 51957b952d6SSatish Balay if (mat->was_assembled) { 520905e6a2fSBarry Smith if (!baij->colmap) { 521905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 522905e6a2fSBarry Smith } 523905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 52457b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 52557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 52657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 52757b952d6SSatish Balay } 52857b952d6SSatish Balay } 52957b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 53057b952d6SSatish Balay } 53157b952d6SSatish Balay } 53257b952d6SSatish Balay count--; 53357b952d6SSatish Balay } 53457b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 53557b952d6SSatish Balay 53657b952d6SSatish Balay /* wait on sends */ 53757b952d6SSatish Balay if (baij->nsends) { 53857b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 53957b952d6SSatish Balay CHKPTRQ(send_status); 54057b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 54157b952d6SSatish Balay PetscFree(send_status); 54257b952d6SSatish Balay } 54357b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 54457b952d6SSatish Balay 54557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 54657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 54757b952d6SSatish Balay 54857b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 54957b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 55057b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 55157b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 55257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 55357b952d6SSatish Balay } 55457b952d6SSatish Balay 5556d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 55657b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 55757b952d6SSatish Balay } 55857b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 55957b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 56057b952d6SSatish Balay 56157b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 56257b952d6SSatish Balay return 0; 56357b952d6SSatish Balay } 56457b952d6SSatish Balay 5655615d1e5SSatish Balay #undef __FUNC__ 5665615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 56757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 56857b952d6SSatish Balay { 56957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 57057b952d6SSatish Balay int ierr; 57157b952d6SSatish Balay 57257b952d6SSatish Balay if (baij->size == 1) { 57357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 57457b952d6SSatish Balay } 575e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 57657b952d6SSatish Balay return 0; 57757b952d6SSatish Balay } 57857b952d6SSatish Balay 5795615d1e5SSatish Balay #undef __FUNC__ 5805615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 58157b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 58257b952d6SSatish Balay { 58357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 584cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 58557b952d6SSatish Balay FILE *fd; 58657b952d6SSatish Balay ViewerType vtype; 58757b952d6SSatish Balay 58857b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 58957b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 59057b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 591639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5924e220ebcSLois Curfman McInnes MatInfo info; 59357b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 59457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 5954e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 59657b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 59757b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 5984e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 5994e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 6004e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 6014e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 6024e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 6034e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 60457b952d6SSatish Balay fflush(fd); 60557b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 60657b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 60757b952d6SSatish Balay return 0; 60857b952d6SSatish Balay } 609639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 610bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 61157b952d6SSatish Balay return 0; 61257b952d6SSatish Balay } 61357b952d6SSatish Balay } 61457b952d6SSatish Balay 61557b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 61657b952d6SSatish Balay Draw draw; 61757b952d6SSatish Balay PetscTruth isnull; 61857b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 61957b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 62057b952d6SSatish Balay } 62157b952d6SSatish Balay 62257b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 62357b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 62457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 62557b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 62657b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 62757b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 62857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 62957b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 63057b952d6SSatish Balay fflush(fd); 63157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 63257b952d6SSatish Balay } 63357b952d6SSatish Balay else { 63457b952d6SSatish Balay int size = baij->size; 63557b952d6SSatish Balay rank = baij->rank; 63657b952d6SSatish Balay if (size == 1) { 63757b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 63857b952d6SSatish Balay } 63957b952d6SSatish Balay else { 64057b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 64157b952d6SSatish Balay Mat A; 64257b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 64357b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 64457b952d6SSatish Balay int mbs=baij->mbs; 64557b952d6SSatish Balay Scalar *a; 64657b952d6SSatish Balay 64757b952d6SSatish Balay if (!rank) { 648cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 64957b952d6SSatish Balay CHKERRQ(ierr); 65057b952d6SSatish Balay } 65157b952d6SSatish Balay else { 652cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 65357b952d6SSatish Balay CHKERRQ(ierr); 65457b952d6SSatish Balay } 65557b952d6SSatish Balay PLogObjectParent(mat,A); 65657b952d6SSatish Balay 65757b952d6SSatish Balay /* copy over the A part */ 65857b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 65957b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66057b952d6SSatish Balay row = baij->rstart; 66157b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 66257b952d6SSatish Balay 66357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 66457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 66557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 66657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 66757b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 66857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 669cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 670cee3aa6bSSatish Balay col++; a += bs; 67157b952d6SSatish Balay } 67257b952d6SSatish Balay } 67357b952d6SSatish Balay } 67457b952d6SSatish Balay /* copy over the B part */ 67557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 67657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 67757b952d6SSatish Balay row = baij->rstart*bs; 67857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 67957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 68057b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 68157b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 68257b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 68357b952d6SSatish Balay for (k=0; k<bs; k++ ) { 684cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 685cee3aa6bSSatish Balay col++; a += bs; 68657b952d6SSatish Balay } 68757b952d6SSatish Balay } 68857b952d6SSatish Balay } 68957b952d6SSatish Balay PetscFree(rvals); 6906d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6916d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 69257b952d6SSatish Balay if (!rank) { 69357b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 69457b952d6SSatish Balay } 69557b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 69657b952d6SSatish Balay } 69757b952d6SSatish Balay } 69857b952d6SSatish Balay return 0; 69957b952d6SSatish Balay } 70057b952d6SSatish Balay 70157b952d6SSatish Balay 70257b952d6SSatish Balay 7035615d1e5SSatish Balay #undef __FUNC__ 7045615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 705ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 70657b952d6SSatish Balay { 70757b952d6SSatish Balay Mat mat = (Mat) obj; 70857b952d6SSatish Balay int ierr; 70957b952d6SSatish Balay ViewerType vtype; 71057b952d6SSatish Balay 71157b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 71257b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 71357b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 71457b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 71557b952d6SSatish Balay } 71657b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 71757b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 71857b952d6SSatish Balay } 71957b952d6SSatish Balay return 0; 72057b952d6SSatish Balay } 72157b952d6SSatish Balay 7225615d1e5SSatish Balay #undef __FUNC__ 7235615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 724ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 72579bdfe76SSatish Balay { 72679bdfe76SSatish Balay Mat mat = (Mat) obj; 72779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 72879bdfe76SSatish Balay int ierr; 72979bdfe76SSatish Balay 73079bdfe76SSatish Balay #if defined(PETSC_LOG) 73179bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 73279bdfe76SSatish Balay #endif 73379bdfe76SSatish Balay 73479bdfe76SSatish Balay PetscFree(baij->rowners); 73579bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 73679bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 73779bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 73879bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 73979bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 74079bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 74179bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 74279bdfe76SSatish Balay PetscFree(baij); 74390f02eecSBarry Smith if (mat->mapping) { 74490f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 74590f02eecSBarry Smith } 74679bdfe76SSatish Balay PLogObjectDestroy(mat); 74779bdfe76SSatish Balay PetscHeaderDestroy(mat); 74879bdfe76SSatish Balay return 0; 74979bdfe76SSatish Balay } 75079bdfe76SSatish Balay 7515615d1e5SSatish Balay #undef __FUNC__ 7525615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 753ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 754cee3aa6bSSatish Balay { 755cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 75647b4a8eaSLois Curfman McInnes int ierr, nt; 757cee3aa6bSSatish Balay 758c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 75947b4a8eaSLois Curfman McInnes if (nt != a->n) { 760ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 76147b4a8eaSLois Curfman McInnes } 762c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 76347b4a8eaSLois Curfman McInnes if (nt != a->m) { 764e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 76547b4a8eaSLois Curfman McInnes } 76643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 767cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 76843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 769cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 77043a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 771cee3aa6bSSatish Balay return 0; 772cee3aa6bSSatish Balay } 773cee3aa6bSSatish Balay 7745615d1e5SSatish Balay #undef __FUNC__ 7755615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 776ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 777cee3aa6bSSatish Balay { 778cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 779cee3aa6bSSatish Balay int ierr; 78043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 781cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 78243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 783cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 784cee3aa6bSSatish Balay return 0; 785cee3aa6bSSatish Balay } 786cee3aa6bSSatish Balay 7875615d1e5SSatish Balay #undef __FUNC__ 7885615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 789ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 790cee3aa6bSSatish Balay { 791cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 792cee3aa6bSSatish Balay int ierr; 793cee3aa6bSSatish Balay 794cee3aa6bSSatish Balay /* do nondiagonal part */ 795cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 796cee3aa6bSSatish Balay /* send it on its way */ 797537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 798cee3aa6bSSatish Balay /* do local part */ 799cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 800cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 801cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 802cee3aa6bSSatish Balay /* but is not perhaps always true. */ 803639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 804cee3aa6bSSatish Balay return 0; 805cee3aa6bSSatish Balay } 806cee3aa6bSSatish Balay 8075615d1e5SSatish Balay #undef __FUNC__ 8085615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 809ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 810cee3aa6bSSatish Balay { 811cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 812cee3aa6bSSatish Balay int ierr; 813cee3aa6bSSatish Balay 814cee3aa6bSSatish Balay /* do nondiagonal part */ 815cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 816cee3aa6bSSatish Balay /* send it on its way */ 817537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 818cee3aa6bSSatish Balay /* do local part */ 819cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 820cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 821cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 822cee3aa6bSSatish Balay /* but is not perhaps always true. */ 823537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 824cee3aa6bSSatish Balay return 0; 825cee3aa6bSSatish Balay } 826cee3aa6bSSatish Balay 827cee3aa6bSSatish Balay /* 828cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 829cee3aa6bSSatish Balay diagonal block 830cee3aa6bSSatish Balay */ 8315615d1e5SSatish Balay #undef __FUNC__ 8325615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 833ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 834cee3aa6bSSatish Balay { 835cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 836cee3aa6bSSatish Balay if (a->M != a->N) 837e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 838cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 839cee3aa6bSSatish Balay } 840cee3aa6bSSatish Balay 8415615d1e5SSatish Balay #undef __FUNC__ 8425615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 843ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 844cee3aa6bSSatish Balay { 845cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 846cee3aa6bSSatish Balay int ierr; 847cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 848cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 849cee3aa6bSSatish Balay return 0; 850cee3aa6bSSatish Balay } 851026e39d0SSatish Balay 8525615d1e5SSatish Balay #undef __FUNC__ 8535615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 854ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 85557b952d6SSatish Balay { 85657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 85757b952d6SSatish Balay *m = mat->M; *n = mat->N; 85857b952d6SSatish Balay return 0; 85957b952d6SSatish Balay } 86057b952d6SSatish Balay 8615615d1e5SSatish Balay #undef __FUNC__ 8625615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 863ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 86457b952d6SSatish Balay { 86557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 86657b952d6SSatish Balay *m = mat->m; *n = mat->N; 86757b952d6SSatish Balay return 0; 86857b952d6SSatish Balay } 86957b952d6SSatish Balay 8705615d1e5SSatish Balay #undef __FUNC__ 8715615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 872ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 87357b952d6SSatish Balay { 87457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 87557b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 87657b952d6SSatish Balay return 0; 87757b952d6SSatish Balay } 87857b952d6SSatish Balay 879acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 880acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 881acdf5bf4SSatish Balay 8825615d1e5SSatish Balay #undef __FUNC__ 8835615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 884acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 885acdf5bf4SSatish Balay { 886acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 887acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 888acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 889d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 890d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 891acdf5bf4SSatish Balay 892e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 893acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 894acdf5bf4SSatish Balay 895acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 896acdf5bf4SSatish Balay /* 897acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 898acdf5bf4SSatish Balay */ 899acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 900bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 901bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 902acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 903acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 904acdf5bf4SSatish Balay } 905acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 906acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 907acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 908acdf5bf4SSatish Balay } 909acdf5bf4SSatish Balay 910acdf5bf4SSatish Balay 911e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 912d9d09a02SSatish Balay lrow = row - brstart; 913acdf5bf4SSatish Balay 914acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 915acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 916acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 917acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 918acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 919acdf5bf4SSatish Balay nztot = nzA + nzB; 920acdf5bf4SSatish Balay 921acdf5bf4SSatish Balay cmap = mat->garray; 922acdf5bf4SSatish Balay if (v || idx) { 923acdf5bf4SSatish Balay if (nztot) { 924acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 925acdf5bf4SSatish Balay int imark = -1; 926acdf5bf4SSatish Balay if (v) { 927acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 928acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 929d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 930acdf5bf4SSatish Balay else break; 931acdf5bf4SSatish Balay } 932acdf5bf4SSatish Balay imark = i; 933acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 934acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 935acdf5bf4SSatish Balay } 936acdf5bf4SSatish Balay if (idx) { 937acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 938acdf5bf4SSatish Balay if (imark > -1) { 939acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 940bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 941acdf5bf4SSatish Balay } 942acdf5bf4SSatish Balay } else { 943acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 944d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 945d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 946acdf5bf4SSatish Balay else break; 947acdf5bf4SSatish Balay } 948acdf5bf4SSatish Balay imark = i; 949acdf5bf4SSatish Balay } 950d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 951d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 952acdf5bf4SSatish Balay } 953acdf5bf4SSatish Balay } 954d212a18eSSatish Balay else { 955d212a18eSSatish Balay if (idx) *idx = 0; 956d212a18eSSatish Balay if (v) *v = 0; 957d212a18eSSatish Balay } 958acdf5bf4SSatish Balay } 959acdf5bf4SSatish Balay *nz = nztot; 960acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 961acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 962acdf5bf4SSatish Balay return 0; 963acdf5bf4SSatish Balay } 964acdf5bf4SSatish Balay 9655615d1e5SSatish Balay #undef __FUNC__ 9665615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 967acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 968acdf5bf4SSatish Balay { 969acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 970acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 971e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 972acdf5bf4SSatish Balay } 973acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 974acdf5bf4SSatish Balay return 0; 975acdf5bf4SSatish Balay } 976acdf5bf4SSatish Balay 9775615d1e5SSatish Balay #undef __FUNC__ 9785615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 979ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 9805a838052SSatish Balay { 9815a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 9825a838052SSatish Balay *bs = baij->bs; 9835a838052SSatish Balay return 0; 9845a838052SSatish Balay } 9855a838052SSatish Balay 9865615d1e5SSatish Balay #undef __FUNC__ 9875615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 988ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 98958667388SSatish Balay { 99058667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 99158667388SSatish Balay int ierr; 99258667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 99358667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 99458667388SSatish Balay return 0; 99558667388SSatish Balay } 9960ac07820SSatish Balay 9975615d1e5SSatish Balay #undef __FUNC__ 9985615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 999ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 10000ac07820SSatish Balay { 10014e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 10024e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 10037d57db60SLois Curfman McInnes int ierr; 10047d57db60SLois Curfman McInnes double isend[5], irecv[5]; 10050ac07820SSatish Balay 10064e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 10074e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 10084e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 10094e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 10104e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 10114e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10124e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 10134e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10144e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 10150ac07820SSatish Balay if (flag == MAT_LOCAL) { 10164e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10174e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10184e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 10194e220ebcSLois Curfman McInnes info->memory = isend[3]; 10204e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 10210ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1022dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 10234e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10244e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10254e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10264e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10274e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 10280ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1029dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 10304e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 10314e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 10324e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 10334e220ebcSLois Curfman McInnes info->memory = irecv[3]; 10344e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 10350ac07820SSatish Balay } 10364e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 10374e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 10384e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 10390ac07820SSatish Balay return 0; 10400ac07820SSatish Balay } 10410ac07820SSatish Balay 10425615d1e5SSatish Balay #undef __FUNC__ 10435615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1044ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 104558667388SSatish Balay { 104658667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 104758667388SSatish Balay 104858667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 104958667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 10506da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1051c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 1052c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1053b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1054b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1055b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1056aeafbbfcSLois Curfman McInnes a->roworiented = 1; 105758667388SSatish Balay MatSetOption(a->A,op); 105858667388SSatish Balay MatSetOption(a->B,op); 1059b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 10606da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 106158667388SSatish Balay op == MAT_SYMMETRIC || 106258667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 106358667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 106458667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 106558667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 106658667388SSatish Balay a->roworiented = 0; 106758667388SSatish Balay MatSetOption(a->A,op); 106858667388SSatish Balay MatSetOption(a->B,op); 106990f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 107090f02eecSBarry Smith a->donotstash = 1; 107190f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1072e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 107358667388SSatish Balay else 1074e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 107558667388SSatish Balay return 0; 107658667388SSatish Balay } 107758667388SSatish Balay 10785615d1e5SSatish Balay #undef __FUNC__ 10795615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1080ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 10810ac07820SSatish Balay { 10820ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 10830ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 10840ac07820SSatish Balay Mat B; 10850ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 10860ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 10870ac07820SSatish Balay Scalar *a; 10880ac07820SSatish Balay 10890ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 1090e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 10910ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 10920ac07820SSatish Balay CHKERRQ(ierr); 10930ac07820SSatish Balay 10940ac07820SSatish Balay /* copy over the A part */ 10950ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 10960ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 10970ac07820SSatish Balay row = baij->rstart; 10980ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 10990ac07820SSatish Balay 11000ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 11010ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 11020ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 11030ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 11040ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 11050ac07820SSatish Balay for (k=0; k<bs; k++ ) { 11060ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 11070ac07820SSatish Balay col++; a += bs; 11080ac07820SSatish Balay } 11090ac07820SSatish Balay } 11100ac07820SSatish Balay } 11110ac07820SSatish Balay /* copy over the B part */ 11120ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 11130ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 11140ac07820SSatish Balay row = baij->rstart*bs; 11150ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 11160ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 11170ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 11180ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 11190ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 11200ac07820SSatish Balay for (k=0; k<bs; k++ ) { 11210ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 11220ac07820SSatish Balay col++; a += bs; 11230ac07820SSatish Balay } 11240ac07820SSatish Balay } 11250ac07820SSatish Balay } 11260ac07820SSatish Balay PetscFree(rvals); 11270ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11280ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11290ac07820SSatish Balay 11300ac07820SSatish Balay if (matout != PETSC_NULL) { 11310ac07820SSatish Balay *matout = B; 11320ac07820SSatish Balay } else { 11330ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 11340ac07820SSatish Balay PetscFree(baij->rowners); 11350ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 11360ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 11370ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 11380ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 11390ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 11400ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 11410ac07820SSatish Balay PetscFree(baij); 11420ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 11430ac07820SSatish Balay PetscHeaderDestroy(B); 11440ac07820SSatish Balay } 11450ac07820SSatish Balay return 0; 11460ac07820SSatish Balay } 11470e95ebc0SSatish Balay 11485615d1e5SSatish Balay #undef __FUNC__ 11495615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 11500e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 11510e95ebc0SSatish Balay { 11520e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 11530e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 11540e95ebc0SSatish Balay int ierr,s1,s2,s3; 11550e95ebc0SSatish Balay 11560e95ebc0SSatish Balay if (ll) { 11570e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 11580e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1159e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 11600e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 11610e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 11620e95ebc0SSatish Balay } 1163e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 11640e95ebc0SSatish Balay return 0; 11650e95ebc0SSatish Balay } 11660e95ebc0SSatish Balay 11670ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 11680ac07820SSatish Balay matrix is square and the column and row owerships are identical. 11690ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 11700ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 11710ac07820SSatish Balay routine. 11720ac07820SSatish Balay */ 11735615d1e5SSatish Balay #undef __FUNC__ 11745615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1175ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 11760ac07820SSatish Balay { 11770ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 11780ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 11790ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 11800ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 11810ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 11820ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 11830ac07820SSatish Balay MPI_Comm comm = A->comm; 11840ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 11850ac07820SSatish Balay MPI_Status recv_status,*send_status; 11860ac07820SSatish Balay IS istmp; 11870ac07820SSatish Balay 11880ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 11890ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 11900ac07820SSatish Balay 11910ac07820SSatish Balay /* first count number of contributors to each processor */ 11920ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 11930ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 11940ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 11950ac07820SSatish Balay for ( i=0; i<N; i++ ) { 11960ac07820SSatish Balay idx = rows[i]; 11970ac07820SSatish Balay found = 0; 11980ac07820SSatish Balay for ( j=0; j<size; j++ ) { 11990ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 12000ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 12010ac07820SSatish Balay } 12020ac07820SSatish Balay } 1203e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 12040ac07820SSatish Balay } 12050ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 12060ac07820SSatish Balay 12070ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 12080ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 12090ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 12100ac07820SSatish Balay nrecvs = work[rank]; 12110ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 12120ac07820SSatish Balay nmax = work[rank]; 12130ac07820SSatish Balay PetscFree(work); 12140ac07820SSatish Balay 12150ac07820SSatish Balay /* post receives: */ 12160ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 12170ac07820SSatish Balay CHKPTRQ(rvalues); 12180ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 12190ac07820SSatish Balay CHKPTRQ(recv_waits); 12200ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 12210ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 12220ac07820SSatish Balay } 12230ac07820SSatish Balay 12240ac07820SSatish Balay /* do sends: 12250ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 12260ac07820SSatish Balay the ith processor 12270ac07820SSatish Balay */ 12280ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 12290ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 12300ac07820SSatish Balay CHKPTRQ(send_waits); 12310ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 12320ac07820SSatish Balay starts[0] = 0; 12330ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 12340ac07820SSatish Balay for ( i=0; i<N; i++ ) { 12350ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 12360ac07820SSatish Balay } 12370ac07820SSatish Balay ISRestoreIndices(is,&rows); 12380ac07820SSatish Balay 12390ac07820SSatish Balay starts[0] = 0; 12400ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 12410ac07820SSatish Balay count = 0; 12420ac07820SSatish Balay for ( i=0; i<size; i++ ) { 12430ac07820SSatish Balay if (procs[i]) { 12440ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 12450ac07820SSatish Balay } 12460ac07820SSatish Balay } 12470ac07820SSatish Balay PetscFree(starts); 12480ac07820SSatish Balay 12490ac07820SSatish Balay base = owners[rank]*bs; 12500ac07820SSatish Balay 12510ac07820SSatish Balay /* wait on receives */ 12520ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 12530ac07820SSatish Balay source = lens + nrecvs; 12540ac07820SSatish Balay count = nrecvs; slen = 0; 12550ac07820SSatish Balay while (count) { 12560ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 12570ac07820SSatish Balay /* unpack receives into our local space */ 12580ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 12590ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 12600ac07820SSatish Balay lens[imdex] = n; 12610ac07820SSatish Balay slen += n; 12620ac07820SSatish Balay count--; 12630ac07820SSatish Balay } 12640ac07820SSatish Balay PetscFree(recv_waits); 12650ac07820SSatish Balay 12660ac07820SSatish Balay /* move the data into the send scatter */ 12670ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 12680ac07820SSatish Balay count = 0; 12690ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 12700ac07820SSatish Balay values = rvalues + i*nmax; 12710ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 12720ac07820SSatish Balay lrows[count++] = values[j] - base; 12730ac07820SSatish Balay } 12740ac07820SSatish Balay } 12750ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 12760ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 12770ac07820SSatish Balay 12780ac07820SSatish Balay /* actually zap the local rows */ 1279537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 12800ac07820SSatish Balay PLogObjectParent(A,istmp); 12810ac07820SSatish Balay PetscFree(lrows); 12820ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 12830ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 12840ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 12850ac07820SSatish Balay 12860ac07820SSatish Balay /* wait on sends */ 12870ac07820SSatish Balay if (nsends) { 12880ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 12890ac07820SSatish Balay CHKPTRQ(send_status); 12900ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 12910ac07820SSatish Balay PetscFree(send_status); 12920ac07820SSatish Balay } 12930ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 12940ac07820SSatish Balay 12950ac07820SSatish Balay return 0; 12960ac07820SSatish Balay } 1297ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 12985615d1e5SSatish Balay #undef __FUNC__ 12995615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1300ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1301ba4ca20aSSatish Balay { 1302ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1303ba4ca20aSSatish Balay 1304ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1305ba4ca20aSSatish Balay else return 0; 1306ba4ca20aSSatish Balay } 13070ac07820SSatish Balay 13085615d1e5SSatish Balay #undef __FUNC__ 13095615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1310ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1311bb5a7306SBarry Smith { 1312bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1313bb5a7306SBarry Smith int ierr; 1314bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1315bb5a7306SBarry Smith return 0; 1316bb5a7306SBarry Smith } 1317bb5a7306SBarry Smith 13180ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 13190ac07820SSatish Balay 132079bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 132179bdfe76SSatish Balay static struct _MatOps MatOps = { 1322bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 13234c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 13244c50302cSBarry Smith 0,0,0,0, 13250ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 13260e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 132758667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 13284c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 13294c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 13304c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 133194a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1332d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1333ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1334bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1335ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 133679bdfe76SSatish Balay 133779bdfe76SSatish Balay 13385615d1e5SSatish Balay #undef __FUNC__ 13395615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 134079bdfe76SSatish Balay /*@C 134179bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 134279bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 134379bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 134479bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 134579bdfe76SSatish Balay performance can be increased by more than a factor of 50. 134679bdfe76SSatish Balay 134779bdfe76SSatish Balay Input Parameters: 134879bdfe76SSatish Balay . comm - MPI communicator 134979bdfe76SSatish Balay . bs - size of blockk 135079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 135192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 135292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 135392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 135492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 135592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 135679bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 135792e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 135879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 135979bdfe76SSatish Balay submatrix (same for all local rows) 136092e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 136192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 136292e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 136392e8d321SLois Curfman McInnes it is zero. 136492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 136579bdfe76SSatish Balay submatrix (same for all local rows). 136692e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 136792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 136892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 136979bdfe76SSatish Balay 137079bdfe76SSatish Balay Output Parameter: 137179bdfe76SSatish Balay . A - the matrix 137279bdfe76SSatish Balay 137379bdfe76SSatish Balay Notes: 137479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 137579bdfe76SSatish Balay (possibly both). 137679bdfe76SSatish Balay 137779bdfe76SSatish Balay Storage Information: 137879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 137979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 138079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 138179bdfe76SSatish Balay local matrix (a rectangular submatrix). 138279bdfe76SSatish Balay 138379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 138479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 138579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 138679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 138779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 138879bdfe76SSatish Balay 138979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 139079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 139179bdfe76SSatish Balay 139279bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 139379bdfe76SSatish Balay $ ------------------- 139479bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 139579bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 139679bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 139779bdfe76SSatish Balay $ ------------------- 139879bdfe76SSatish Balay $ 139979bdfe76SSatish Balay 140079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 140179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 140279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 140357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 140479bdfe76SSatish Balay 140579bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 140679bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 140779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 140892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 140992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14106da5968aSLois Curfman McInnes matrices. 141179bdfe76SSatish Balay 141292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 141379bdfe76SSatish Balay 141479bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 141579bdfe76SSatish Balay @*/ 141679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 141779bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 141879bdfe76SSatish Balay { 141979bdfe76SSatish Balay Mat B; 142079bdfe76SSatish Balay Mat_MPIBAIJ *b; 14214c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 142279bdfe76SSatish Balay 1423e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 142479bdfe76SSatish Balay *A = 0; 142579bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 142679bdfe76SSatish Balay PLogObjectCreate(B); 142779bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 142879bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 142979bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 14304c50302cSBarry Smith MPI_Comm_size(comm,&size); 14314c50302cSBarry Smith if (size == 1) { 14324c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 14334c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 14344c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 14354c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 14364c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 14374c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 14384c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 14394c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 14404c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 14414c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 14424c50302cSBarry Smith } 14434c50302cSBarry Smith 144479bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 144579bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 144690f02eecSBarry Smith B->mapping = 0; 144779bdfe76SSatish Balay B->factor = 0; 144879bdfe76SSatish Balay B->assembled = PETSC_FALSE; 144979bdfe76SSatish Balay 1450e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 145179bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 145279bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 145379bdfe76SSatish Balay 145479bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1455e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1456e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1457e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1458cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1459cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 146079bdfe76SSatish Balay 146179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 146279bdfe76SSatish Balay work[0] = m; work[1] = n; 146379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 146479bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 146579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 146679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 146779bdfe76SSatish Balay } 146879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 146979bdfe76SSatish Balay Mbs = M/bs; 1470e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 147179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 147279bdfe76SSatish Balay m = mbs*bs; 147379bdfe76SSatish Balay } 147479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 147579bdfe76SSatish Balay Nbs = N/bs; 1476e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 147779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 147879bdfe76SSatish Balay n = nbs*bs; 147979bdfe76SSatish Balay } 1480e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 148179bdfe76SSatish Balay 148279bdfe76SSatish Balay b->m = m; B->m = m; 148379bdfe76SSatish Balay b->n = n; B->n = n; 148479bdfe76SSatish Balay b->N = N; B->N = N; 148579bdfe76SSatish Balay b->M = M; B->M = M; 148679bdfe76SSatish Balay b->bs = bs; 148779bdfe76SSatish Balay b->bs2 = bs*bs; 148879bdfe76SSatish Balay b->mbs = mbs; 148979bdfe76SSatish Balay b->nbs = nbs; 149079bdfe76SSatish Balay b->Mbs = Mbs; 149179bdfe76SSatish Balay b->Nbs = Nbs; 149279bdfe76SSatish Balay 149379bdfe76SSatish Balay /* build local table of row and column ownerships */ 149479bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 149579bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 14960ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 149779bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 149879bdfe76SSatish Balay b->rowners[0] = 0; 149979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 150079bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 150179bdfe76SSatish Balay } 150279bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 150379bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 15044fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 15054fa0d573SSatish Balay b->rend_bs = b->rend * bs; 15064fa0d573SSatish Balay 150779bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 150879bdfe76SSatish Balay b->cowners[0] = 0; 150979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 151079bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 151179bdfe76SSatish Balay } 151279bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 151379bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 15144fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 15154fa0d573SSatish Balay b->cend_bs = b->cend * bs; 151679bdfe76SSatish Balay 151779bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 151879bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 151979bdfe76SSatish Balay PLogObjectParent(B,b->A); 152079bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 152179bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 152279bdfe76SSatish Balay PLogObjectParent(B,b->B); 152379bdfe76SSatish Balay 152479bdfe76SSatish Balay /* build cache for off array entries formed */ 152579bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 152690f02eecSBarry Smith b->donotstash = 0; 152779bdfe76SSatish Balay b->colmap = 0; 152879bdfe76SSatish Balay b->garray = 0; 152979bdfe76SSatish Balay b->roworiented = 1; 153079bdfe76SSatish Balay 153179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 153279bdfe76SSatish Balay b->lvec = 0; 153379bdfe76SSatish Balay b->Mvctx = 0; 153479bdfe76SSatish Balay 153579bdfe76SSatish Balay /* stuff for MatGetRow() */ 153679bdfe76SSatish Balay b->rowindices = 0; 153779bdfe76SSatish Balay b->rowvalues = 0; 153879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 153979bdfe76SSatish Balay 154079bdfe76SSatish Balay *A = B; 154179bdfe76SSatish Balay return 0; 154279bdfe76SSatish Balay } 1543026e39d0SSatish Balay 15445615d1e5SSatish Balay #undef __FUNC__ 15455615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 15460ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 15470ac07820SSatish Balay { 15480ac07820SSatish Balay Mat mat; 15490ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 15500ac07820SSatish Balay int ierr, len=0, flg; 15510ac07820SSatish Balay 15520ac07820SSatish Balay *newmat = 0; 15530ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 15540ac07820SSatish Balay PLogObjectCreate(mat); 15550ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 15560ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 15570ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 15580ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 15590ac07820SSatish Balay mat->factor = matin->factor; 15600ac07820SSatish Balay mat->assembled = PETSC_TRUE; 15610ac07820SSatish Balay 15620ac07820SSatish Balay a->m = mat->m = oldmat->m; 15630ac07820SSatish Balay a->n = mat->n = oldmat->n; 15640ac07820SSatish Balay a->M = mat->M = oldmat->M; 15650ac07820SSatish Balay a->N = mat->N = oldmat->N; 15660ac07820SSatish Balay 15670ac07820SSatish Balay a->bs = oldmat->bs; 15680ac07820SSatish Balay a->bs2 = oldmat->bs2; 15690ac07820SSatish Balay a->mbs = oldmat->mbs; 15700ac07820SSatish Balay a->nbs = oldmat->nbs; 15710ac07820SSatish Balay a->Mbs = oldmat->Mbs; 15720ac07820SSatish Balay a->Nbs = oldmat->Nbs; 15730ac07820SSatish Balay 15740ac07820SSatish Balay a->rstart = oldmat->rstart; 15750ac07820SSatish Balay a->rend = oldmat->rend; 15760ac07820SSatish Balay a->cstart = oldmat->cstart; 15770ac07820SSatish Balay a->cend = oldmat->cend; 15780ac07820SSatish Balay a->size = oldmat->size; 15790ac07820SSatish Balay a->rank = oldmat->rank; 1580e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 15810ac07820SSatish Balay a->rowvalues = 0; 15820ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 15830ac07820SSatish Balay 15840ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 15850ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 15860ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 15870ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 15880ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 15890ac07820SSatish Balay if (oldmat->colmap) { 15900ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 15910ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 15920ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 15930ac07820SSatish Balay } else a->colmap = 0; 15940ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 15950ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 15960ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 15970ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 15980ac07820SSatish Balay } else a->garray = 0; 15990ac07820SSatish Balay 16000ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 16010ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 16020ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 16030ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 16040ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 16050ac07820SSatish Balay PLogObjectParent(mat,a->A); 16060ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 16070ac07820SSatish Balay PLogObjectParent(mat,a->B); 16080ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 16090ac07820SSatish Balay if (flg) { 16100ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 16110ac07820SSatish Balay } 16120ac07820SSatish Balay *newmat = mat; 16130ac07820SSatish Balay return 0; 16140ac07820SSatish Balay } 161557b952d6SSatish Balay 161657b952d6SSatish Balay #include "sys.h" 161757b952d6SSatish Balay 16185615d1e5SSatish Balay #undef __FUNC__ 16195615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 162057b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 162157b952d6SSatish Balay { 162257b952d6SSatish Balay Mat A; 162357b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 162457b952d6SSatish Balay Scalar *vals,*buf; 162557b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 162657b952d6SSatish Balay MPI_Status status; 1627cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 162857b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 162957b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 163057b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 163157b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 163257b952d6SSatish Balay 163357b952d6SSatish Balay 163457b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 163557b952d6SSatish Balay bs2 = bs*bs; 163657b952d6SSatish Balay 163757b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 163857b952d6SSatish Balay if (!rank) { 163957b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 164057b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1641e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 164257b952d6SSatish Balay } 164357b952d6SSatish Balay 164457b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 164557b952d6SSatish Balay M = header[1]; N = header[2]; 164657b952d6SSatish Balay 1647e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 164857b952d6SSatish Balay 164957b952d6SSatish Balay /* 165057b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 165157b952d6SSatish Balay divisible by the blocksize 165257b952d6SSatish Balay */ 165357b952d6SSatish Balay Mbs = M/bs; 165457b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 165557b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 165657b952d6SSatish Balay else Mbs++; 165757b952d6SSatish Balay if (extra_rows &&!rank) { 1658b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 165957b952d6SSatish Balay } 1660537820f0SBarry Smith 166157b952d6SSatish Balay /* determine ownership of all rows */ 166257b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 166357b952d6SSatish Balay m = mbs * bs; 1664cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1665cee3aa6bSSatish Balay browners = rowners + size + 1; 166657b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 166757b952d6SSatish Balay rowners[0] = 0; 1668cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1669cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 167057b952d6SSatish Balay rstart = rowners[rank]; 167157b952d6SSatish Balay rend = rowners[rank+1]; 167257b952d6SSatish Balay 167357b952d6SSatish Balay /* distribute row lengths to all processors */ 167457b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 167557b952d6SSatish Balay if (!rank) { 167657b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 167757b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 167857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 167957b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1680cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1681cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 168257b952d6SSatish Balay PetscFree(sndcounts); 168357b952d6SSatish Balay } 168457b952d6SSatish Balay else { 168557b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 168657b952d6SSatish Balay } 168757b952d6SSatish Balay 168857b952d6SSatish Balay if (!rank) { 168957b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 169057b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 169157b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 169257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 169357b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 169457b952d6SSatish Balay procsnz[i] += rowlengths[j]; 169557b952d6SSatish Balay } 169657b952d6SSatish Balay } 169757b952d6SSatish Balay PetscFree(rowlengths); 169857b952d6SSatish Balay 169957b952d6SSatish Balay /* determine max buffer needed and allocate it */ 170057b952d6SSatish Balay maxnz = 0; 170157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 170257b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 170357b952d6SSatish Balay } 170457b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 170557b952d6SSatish Balay 170657b952d6SSatish Balay /* read in my part of the matrix column indices */ 170757b952d6SSatish Balay nz = procsnz[0]; 170857b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 170957b952d6SSatish Balay mycols = ibuf; 1710cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 171157b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1712cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1713cee3aa6bSSatish Balay 171457b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 171557b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 171657b952d6SSatish Balay nz = procsnz[i]; 171757b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 171857b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 171957b952d6SSatish Balay } 172057b952d6SSatish Balay /* read in the stuff for the last proc */ 172157b952d6SSatish Balay if ( size != 1 ) { 172257b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 172357b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 172457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1725cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 172657b952d6SSatish Balay } 172757b952d6SSatish Balay PetscFree(cols); 172857b952d6SSatish Balay } 172957b952d6SSatish Balay else { 173057b952d6SSatish Balay /* determine buffer space needed for message */ 173157b952d6SSatish Balay nz = 0; 173257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 173357b952d6SSatish Balay nz += locrowlens[i]; 173457b952d6SSatish Balay } 173557b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 173657b952d6SSatish Balay mycols = ibuf; 173757b952d6SSatish Balay /* receive message of column indices*/ 173857b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 173957b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1740e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 174157b952d6SSatish Balay } 174257b952d6SSatish Balay 174357b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1744cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1745cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 174657b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1747cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 174857b952d6SSatish Balay masked1 = mask + Mbs; 174957b952d6SSatish Balay masked2 = masked1 + Mbs; 175057b952d6SSatish Balay rowcount = 0; nzcount = 0; 175157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 175257b952d6SSatish Balay dcount = 0; 175357b952d6SSatish Balay odcount = 0; 175457b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 175557b952d6SSatish Balay kmax = locrowlens[rowcount]; 175657b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 175757b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 175857b952d6SSatish Balay if (!mask[tmp]) { 175957b952d6SSatish Balay mask[tmp] = 1; 176057b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 176157b952d6SSatish Balay else masked1[dcount++] = tmp; 176257b952d6SSatish Balay } 176357b952d6SSatish Balay } 176457b952d6SSatish Balay rowcount++; 176557b952d6SSatish Balay } 1766cee3aa6bSSatish Balay 176757b952d6SSatish Balay dlens[i] = dcount; 176857b952d6SSatish Balay odlens[i] = odcount; 1769cee3aa6bSSatish Balay 177057b952d6SSatish Balay /* zero out the mask elements we set */ 177157b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 177257b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 177357b952d6SSatish Balay } 1774cee3aa6bSSatish Balay 177557b952d6SSatish Balay /* create our matrix */ 1776537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1777537820f0SBarry Smith CHKERRQ(ierr); 177857b952d6SSatish Balay A = *newmat; 17796d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 178057b952d6SSatish Balay 178157b952d6SSatish Balay if (!rank) { 178257b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 178357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 178457b952d6SSatish Balay nz = procsnz[0]; 178557b952d6SSatish Balay vals = buf; 1786cee3aa6bSSatish Balay mycols = ibuf; 1787cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 178857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1789cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1790537820f0SBarry Smith 179157b952d6SSatish Balay /* insert into matrix */ 179257b952d6SSatish Balay jj = rstart*bs; 179357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 179457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 179557b952d6SSatish Balay mycols += locrowlens[i]; 179657b952d6SSatish Balay vals += locrowlens[i]; 179757b952d6SSatish Balay jj++; 179857b952d6SSatish Balay } 179957b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 180057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 180157b952d6SSatish Balay nz = procsnz[i]; 180257b952d6SSatish Balay vals = buf; 180357b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 180457b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 180557b952d6SSatish Balay } 180657b952d6SSatish Balay /* the last proc */ 180757b952d6SSatish Balay if ( size != 1 ){ 180857b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1809cee3aa6bSSatish Balay vals = buf; 181057b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 181157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1812cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 181357b952d6SSatish Balay } 181457b952d6SSatish Balay PetscFree(procsnz); 181557b952d6SSatish Balay } 181657b952d6SSatish Balay else { 181757b952d6SSatish Balay /* receive numeric values */ 181857b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 181957b952d6SSatish Balay 182057b952d6SSatish Balay /* receive message of values*/ 182157b952d6SSatish Balay vals = buf; 1822cee3aa6bSSatish Balay mycols = ibuf; 182357b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 182457b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1825e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 182657b952d6SSatish Balay 182757b952d6SSatish Balay /* insert into matrix */ 182857b952d6SSatish Balay jj = rstart*bs; 1829cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 183057b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 183157b952d6SSatish Balay mycols += locrowlens[i]; 183257b952d6SSatish Balay vals += locrowlens[i]; 183357b952d6SSatish Balay jj++; 183457b952d6SSatish Balay } 183557b952d6SSatish Balay } 183657b952d6SSatish Balay PetscFree(locrowlens); 183757b952d6SSatish Balay PetscFree(buf); 183857b952d6SSatish Balay PetscFree(ibuf); 183957b952d6SSatish Balay PetscFree(rowners); 184057b952d6SSatish Balay PetscFree(dlens); 1841cee3aa6bSSatish Balay PetscFree(mask); 18426d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 18436d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 184457b952d6SSatish Balay return 0; 184557b952d6SSatish Balay } 184657b952d6SSatish Balay 184757b952d6SSatish Balay 1848