179bdfe76SSatish Balay #ifndef lint 2*f5e9677aSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.50 1997/01/14 17:15:35 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 243b2fbd54SBarry Smith 25537820f0SBarry Smith /* 26537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2957b952d6SSatish Balay length of colmap equals the global matrix length. 3057b952d6SSatish Balay */ 315615d1e5SSatish Balay #undef __FUNC__ 325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 37928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3857b952d6SSatish Balay 3957b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 4057b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4157b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 42928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4357b952d6SSatish Balay return 0; 4457b952d6SSatish Balay } 4557b952d6SSatish Balay 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 5057b952d6SSatish Balay { 513b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5257b952d6SSatish Balay int ierr; 533b2fbd54SBarry Smith if (aij->size == 1) { 543b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 55e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 563b2fbd54SBarry Smith return 0; 573b2fbd54SBarry Smith } 583b2fbd54SBarry Smith 595615d1e5SSatish Balay #undef __FUNC__ 605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 623b2fbd54SBarry Smith PetscTruth *done) 633b2fbd54SBarry Smith { 643b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 653b2fbd54SBarry Smith int ierr; 663b2fbd54SBarry Smith if (aij->size == 1) { 673b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 68e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6957b952d6SSatish Balay return 0; 7057b952d6SSatish Balay } 7180c1aa95SSatish Balay #define CHUNKSIZE 10 7280c1aa95SSatish Balay 73*f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7480c1aa95SSatish Balay { \ 7580c1aa95SSatish Balay \ 7680c1aa95SSatish Balay brow = row/bs; \ 7780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 7880c1aa95SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; \ 7980c1aa95SSatish Balay bcol = col/bs; \ 8080c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 8180c1aa95SSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 8280c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8380c1aa95SSatish Balay if (rp[_i] == bcol) { \ 8480c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 85eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 86eada6651SSatish Balay else *bap = value; \ 8780c1aa95SSatish Balay goto _noinsert; \ 8880c1aa95SSatish Balay } \ 8980c1aa95SSatish Balay } \ 900520107fSSatish Balay if (a->nonew) goto _noinsert; \ 9180c1aa95SSatish Balay if (nrow >= rmax) { \ 9280c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9380c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 9480c1aa95SSatish Balay Scalar *new_a; \ 9580c1aa95SSatish Balay \ 9680c1aa95SSatish Balay /* malloc new storage space */ \ 9780c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 9880c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 9980c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10080c1aa95SSatish Balay new_i = new_j + new_nz; \ 10180c1aa95SSatish Balay \ 10280c1aa95SSatish Balay /* copy over old data into new slots */ \ 10380c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 10480c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 10580c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 10680c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 10780c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 10880c1aa95SSatish Balay len*sizeof(int)); \ 10980c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11080c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11180c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 11280c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 11380c1aa95SSatish Balay /* free up old matrix storage */ \ 11480c1aa95SSatish Balay PetscFree(a->a); \ 11580c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 11680c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 11780c1aa95SSatish Balay a->singlemalloc = 1; \ 11880c1aa95SSatish Balay \ 11980c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 12080c1aa95SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; \ 12180c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 12280c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 12380c1aa95SSatish Balay a->reallocs++; \ 12480c1aa95SSatish Balay a->nz++; \ 12580c1aa95SSatish Balay } \ 12680c1aa95SSatish Balay N = nrow++ - 1; \ 12780c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 12880c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 12980c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13080c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13180c1aa95SSatish Balay } \ 13280c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 13380c1aa95SSatish Balay rp[_i] = bcol; \ 13480c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 13580c1aa95SSatish Balay _noinsert:; \ 13680c1aa95SSatish Balay ailen[brow] = nrow; \ 13780c1aa95SSatish Balay } 13857b952d6SSatish Balay 139639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1405615d1e5SSatish Balay #undef __FUNC__ 1415615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 14257b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 14357b952d6SSatish Balay { 14457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 14557b952d6SSatish Balay Scalar value; 1464fa0d573SSatish Balay int ierr,i,j,row,col; 1474fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1484fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1494fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 15057b952d6SSatish Balay 151eada6651SSatish Balay /* Some Variables required in the macro */ 15280c1aa95SSatish Balay Mat A = baij->A; 15380c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 15480c1aa95SSatish Balay int *rp,ii,nrow,_i,rmax,N; 15580c1aa95SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 15680c1aa95SSatish Balay int *aj=a->j,brow,bcol; 15780c1aa95SSatish Balay int ridx,cidx,bs2=a->bs2; 15880c1aa95SSatish Balay Scalar *ap,*aa=a->a,*bap; 15980c1aa95SSatish Balay 1602c3acbe9SBarry Smith #if defined(PETSC_BOPT_g) 16157b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 162e3372554SBarry Smith SETERRQ(1,0,"Cannot mix inserts and adds"); 16357b952d6SSatish Balay } 1642c3acbe9SBarry Smith #endif 16557b952d6SSatish Balay baij->insertmode = addv; 16657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 167639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 168e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 169e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 170639f9d9dSBarry Smith #endif 17157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 17257b952d6SSatish Balay row = im[i] - rstart_orig; 17357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 17457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 17557b952d6SSatish Balay col = in[j] - cstart_orig; 17657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 177*f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 17880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 17957b952d6SSatish Balay } 180639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 181e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 182e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 183639f9d9dSBarry Smith #endif 18457b952d6SSatish Balay else { 18557b952d6SSatish Balay if (mat->was_assembled) { 186905e6a2fSBarry Smith if (!baij->colmap) { 187905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 188905e6a2fSBarry Smith } 189905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 19057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 19157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 19257b952d6SSatish Balay col = in[j]; 19357b952d6SSatish Balay } 19457b952d6SSatish Balay } 19557b952d6SSatish Balay else col = in[j]; 19657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 197639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 19857b952d6SSatish Balay } 19957b952d6SSatish Balay } 20057b952d6SSatish Balay } 20157b952d6SSatish Balay else { 20290f02eecSBarry Smith if (roworiented && !baij->donotstash) { 20357b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 20457b952d6SSatish Balay } 20557b952d6SSatish Balay else { 20690f02eecSBarry Smith if (!baij->donotstash) { 20757b952d6SSatish Balay row = im[i]; 20857b952d6SSatish Balay for ( j=0; j<n; j++ ) { 20957b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 21057b952d6SSatish Balay } 21157b952d6SSatish Balay } 21257b952d6SSatish Balay } 21357b952d6SSatish Balay } 21490f02eecSBarry Smith } 21557b952d6SSatish Balay return 0; 21657b952d6SSatish Balay } 21757b952d6SSatish Balay 2185615d1e5SSatish Balay #undef __FUNC__ 2195615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 220d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 221d6de1c52SSatish Balay { 222d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 223d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 224d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 225d6de1c52SSatish Balay 226d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 227e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 228e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 229d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 230d6de1c52SSatish Balay row = idxm[i] - bsrstart; 231d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 232e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 233e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 234d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 235d6de1c52SSatish Balay col = idxn[j] - bscstart; 236d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 237d6de1c52SSatish Balay } 238d6de1c52SSatish Balay else { 239905e6a2fSBarry Smith if (!baij->colmap) { 240905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 241905e6a2fSBarry Smith } 242e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 243dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 244d9d09a02SSatish Balay else { 245dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 246d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 247d6de1c52SSatish Balay } 248d6de1c52SSatish Balay } 249d6de1c52SSatish Balay } 250d9d09a02SSatish Balay } 251d6de1c52SSatish Balay else { 252e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 253d6de1c52SSatish Balay } 254d6de1c52SSatish Balay } 255d6de1c52SSatish Balay return 0; 256d6de1c52SSatish Balay } 257d6de1c52SSatish Balay 2585615d1e5SSatish Balay #undef __FUNC__ 2595615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 260d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 261d6de1c52SSatish Balay { 262d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 263d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 264acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 265d6de1c52SSatish Balay double sum = 0.0; 266d6de1c52SSatish Balay Scalar *v; 267d6de1c52SSatish Balay 268d6de1c52SSatish Balay if (baij->size == 1) { 269d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 270d6de1c52SSatish Balay } else { 271d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 272d6de1c52SSatish Balay v = amat->a; 273d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 274d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 275d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 276d6de1c52SSatish Balay #else 277d6de1c52SSatish Balay sum += (*v)*(*v); v++; 278d6de1c52SSatish Balay #endif 279d6de1c52SSatish Balay } 280d6de1c52SSatish Balay v = bmat->a; 281d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 282d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 283d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 284d6de1c52SSatish Balay #else 285d6de1c52SSatish Balay sum += (*v)*(*v); v++; 286d6de1c52SSatish Balay #endif 287d6de1c52SSatish Balay } 288d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 289d6de1c52SSatish Balay *norm = sqrt(*norm); 290d6de1c52SSatish Balay } 291acdf5bf4SSatish Balay else 292e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 293d6de1c52SSatish Balay } 294d6de1c52SSatish Balay return 0; 295d6de1c52SSatish Balay } 29657b952d6SSatish Balay 2975615d1e5SSatish Balay #undef __FUNC__ 2985615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 29957b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 30057b952d6SSatish Balay { 30157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30257b952d6SSatish Balay MPI_Comm comm = mat->comm; 30357b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 30457b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 30557b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 30657b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 30757b952d6SSatish Balay InsertMode addv; 30857b952d6SSatish Balay Scalar *rvalues,*svalues; 30957b952d6SSatish Balay 31057b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 31157b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 31257b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 313e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 31457b952d6SSatish Balay } 31557b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 31657b952d6SSatish Balay 31757b952d6SSatish Balay /* first count number of contributors to each processor */ 31857b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 31957b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 32057b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 32157b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 32257b952d6SSatish Balay idx = baij->stash.idx[i]; 32357b952d6SSatish Balay for ( j=0; j<size; j++ ) { 32457b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 32557b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 32657b952d6SSatish Balay } 32757b952d6SSatish Balay } 32857b952d6SSatish Balay } 32957b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 33057b952d6SSatish Balay 33157b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 33257b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 33357b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 33457b952d6SSatish Balay nreceives = work[rank]; 33557b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 33657b952d6SSatish Balay nmax = work[rank]; 33757b952d6SSatish Balay PetscFree(work); 33857b952d6SSatish Balay 33957b952d6SSatish Balay /* post receives: 34057b952d6SSatish Balay 1) each message will consist of ordered pairs 34157b952d6SSatish Balay (global index,value) we store the global index as a double 34257b952d6SSatish Balay to simplify the message passing. 34357b952d6SSatish Balay 2) since we don't know how long each individual message is we 34457b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 34557b952d6SSatish Balay this is a lot of wasted space. 34657b952d6SSatish Balay 34757b952d6SSatish Balay 34857b952d6SSatish Balay This could be done better. 34957b952d6SSatish Balay */ 35057b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 35157b952d6SSatish Balay CHKPTRQ(rvalues); 35257b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 35357b952d6SSatish Balay CHKPTRQ(recv_waits); 35457b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 35557b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 35657b952d6SSatish Balay comm,recv_waits+i); 35757b952d6SSatish Balay } 35857b952d6SSatish Balay 35957b952d6SSatish Balay /* do sends: 36057b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 36157b952d6SSatish Balay the ith processor 36257b952d6SSatish Balay */ 36357b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 36457b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36557b952d6SSatish Balay CHKPTRQ(send_waits); 36657b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 36757b952d6SSatish Balay starts[0] = 0; 36857b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 36957b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 37057b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 37157b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 37257b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 37357b952d6SSatish Balay } 37457b952d6SSatish Balay PetscFree(owner); 37557b952d6SSatish Balay starts[0] = 0; 37657b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 37757b952d6SSatish Balay count = 0; 37857b952d6SSatish Balay for ( i=0; i<size; i++ ) { 37957b952d6SSatish Balay if (procs[i]) { 38057b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 38157b952d6SSatish Balay comm,send_waits+count++); 38257b952d6SSatish Balay } 38357b952d6SSatish Balay } 38457b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 38557b952d6SSatish Balay 38657b952d6SSatish Balay /* Free cache space */ 387d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 38857b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 38957b952d6SSatish Balay 39057b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 39157b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 39257b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 39357b952d6SSatish Balay baij->rmax = nmax; 39457b952d6SSatish Balay 39557b952d6SSatish Balay return 0; 39657b952d6SSatish Balay } 39757b952d6SSatish Balay 39857b952d6SSatish Balay 3995615d1e5SSatish Balay #undef __FUNC__ 4005615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 40157b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 40257b952d6SSatish Balay { 40357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 40457b952d6SSatish Balay MPI_Status *send_status,recv_status; 40557b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 40657b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 40757b952d6SSatish Balay Scalar *values,val; 40857b952d6SSatish Balay InsertMode addv = baij->insertmode; 40957b952d6SSatish Balay 41057b952d6SSatish Balay /* wait on receives */ 41157b952d6SSatish Balay while (count) { 41257b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 41357b952d6SSatish Balay /* unpack receives into our local space */ 41457b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 41557b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 41657b952d6SSatish Balay n = n/3; 41757b952d6SSatish Balay for ( i=0; i<n; i++ ) { 41857b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 41957b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 42057b952d6SSatish Balay val = values[3*i+2]; 42157b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 42257b952d6SSatish Balay col -= baij->cstart*bs; 42357b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 42457b952d6SSatish Balay } 42557b952d6SSatish Balay else { 42657b952d6SSatish Balay if (mat->was_assembled) { 427905e6a2fSBarry Smith if (!baij->colmap) { 428905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 429905e6a2fSBarry Smith } 430905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 43157b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 43257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 43357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 43457b952d6SSatish Balay } 43557b952d6SSatish Balay } 43657b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 43757b952d6SSatish Balay } 43857b952d6SSatish Balay } 43957b952d6SSatish Balay count--; 44057b952d6SSatish Balay } 44157b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 44257b952d6SSatish Balay 44357b952d6SSatish Balay /* wait on sends */ 44457b952d6SSatish Balay if (baij->nsends) { 44557b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 44657b952d6SSatish Balay CHKPTRQ(send_status); 44757b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 44857b952d6SSatish Balay PetscFree(send_status); 44957b952d6SSatish Balay } 45057b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 45157b952d6SSatish Balay 45257b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 45357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 45457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 45557b952d6SSatish Balay 45657b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 45757b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 45857b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 45957b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 46057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 46157b952d6SSatish Balay } 46257b952d6SSatish Balay 4636d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 46457b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 46557b952d6SSatish Balay } 46657b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 46757b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 46857b952d6SSatish Balay 46957b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 47057b952d6SSatish Balay return 0; 47157b952d6SSatish Balay } 47257b952d6SSatish Balay 4735615d1e5SSatish Balay #undef __FUNC__ 4745615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 47557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 47657b952d6SSatish Balay { 47757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 47857b952d6SSatish Balay int ierr; 47957b952d6SSatish Balay 48057b952d6SSatish Balay if (baij->size == 1) { 48157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 48257b952d6SSatish Balay } 483e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 48457b952d6SSatish Balay return 0; 48557b952d6SSatish Balay } 48657b952d6SSatish Balay 4875615d1e5SSatish Balay #undef __FUNC__ 4885615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 48957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 49057b952d6SSatish Balay { 49157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 492cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 49357b952d6SSatish Balay FILE *fd; 49457b952d6SSatish Balay ViewerType vtype; 49557b952d6SSatish Balay 49657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 49757b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 49857b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 499639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 5004e220ebcSLois Curfman McInnes MatInfo info; 50157b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 50257b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 5034e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 50457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 50557b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 5064e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 5074e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 5084e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 5094e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 5104e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 5114e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 51257b952d6SSatish Balay fflush(fd); 51357b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 51457b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 51557b952d6SSatish Balay return 0; 51657b952d6SSatish Balay } 517639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 518bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 51957b952d6SSatish Balay return 0; 52057b952d6SSatish Balay } 52157b952d6SSatish Balay } 52257b952d6SSatish Balay 52357b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 52457b952d6SSatish Balay Draw draw; 52557b952d6SSatish Balay PetscTruth isnull; 52657b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 52757b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 52857b952d6SSatish Balay } 52957b952d6SSatish Balay 53057b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 53157b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 53257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 53357b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 53457b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 53557b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 53657b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 53757b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 53857b952d6SSatish Balay fflush(fd); 53957b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 54057b952d6SSatish Balay } 54157b952d6SSatish Balay else { 54257b952d6SSatish Balay int size = baij->size; 54357b952d6SSatish Balay rank = baij->rank; 54457b952d6SSatish Balay if (size == 1) { 54557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 54657b952d6SSatish Balay } 54757b952d6SSatish Balay else { 54857b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 54957b952d6SSatish Balay Mat A; 55057b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 55157b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 55257b952d6SSatish Balay int mbs=baij->mbs; 55357b952d6SSatish Balay Scalar *a; 55457b952d6SSatish Balay 55557b952d6SSatish Balay if (!rank) { 556cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 55757b952d6SSatish Balay CHKERRQ(ierr); 55857b952d6SSatish Balay } 55957b952d6SSatish Balay else { 560cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 56157b952d6SSatish Balay CHKERRQ(ierr); 56257b952d6SSatish Balay } 56357b952d6SSatish Balay PLogObjectParent(mat,A); 56457b952d6SSatish Balay 56557b952d6SSatish Balay /* copy over the A part */ 56657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 56757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 56857b952d6SSatish Balay row = baij->rstart; 56957b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 57057b952d6SSatish Balay 57157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 57257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 57357b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 57457b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 57557b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 57657b952d6SSatish Balay for (k=0; k<bs; k++ ) { 577cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 578cee3aa6bSSatish Balay col++; a += bs; 57957b952d6SSatish Balay } 58057b952d6SSatish Balay } 58157b952d6SSatish Balay } 58257b952d6SSatish Balay /* copy over the B part */ 58357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 58457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 58557b952d6SSatish Balay row = baij->rstart*bs; 58657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 58757b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 58857b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 58957b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 59057b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 59157b952d6SSatish Balay for (k=0; k<bs; k++ ) { 592cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 593cee3aa6bSSatish Balay col++; a += bs; 59457b952d6SSatish Balay } 59557b952d6SSatish Balay } 59657b952d6SSatish Balay } 59757b952d6SSatish Balay PetscFree(rvals); 5986d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5996d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 60057b952d6SSatish Balay if (!rank) { 60157b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 60257b952d6SSatish Balay } 60357b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 60457b952d6SSatish Balay } 60557b952d6SSatish Balay } 60657b952d6SSatish Balay return 0; 60757b952d6SSatish Balay } 60857b952d6SSatish Balay 60957b952d6SSatish Balay 61057b952d6SSatish Balay 6115615d1e5SSatish Balay #undef __FUNC__ 6125615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 61357b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 61457b952d6SSatish Balay { 61557b952d6SSatish Balay Mat mat = (Mat) obj; 61657b952d6SSatish Balay int ierr; 61757b952d6SSatish Balay ViewerType vtype; 61857b952d6SSatish Balay 61957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 62057b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 62157b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 62257b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 62357b952d6SSatish Balay } 62457b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 62557b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 62657b952d6SSatish Balay } 62757b952d6SSatish Balay return 0; 62857b952d6SSatish Balay } 62957b952d6SSatish Balay 6305615d1e5SSatish Balay #undef __FUNC__ 6315615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 63279bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 63379bdfe76SSatish Balay { 63479bdfe76SSatish Balay Mat mat = (Mat) obj; 63579bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 63679bdfe76SSatish Balay int ierr; 63779bdfe76SSatish Balay 63879bdfe76SSatish Balay #if defined(PETSC_LOG) 63979bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 64079bdfe76SSatish Balay #endif 64179bdfe76SSatish Balay 64279bdfe76SSatish Balay PetscFree(baij->rowners); 64379bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 64479bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 64579bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 64679bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 64779bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 64879bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 64979bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 65079bdfe76SSatish Balay PetscFree(baij); 65190f02eecSBarry Smith if (mat->mapping) { 65290f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 65390f02eecSBarry Smith } 65479bdfe76SSatish Balay PLogObjectDestroy(mat); 65579bdfe76SSatish Balay PetscHeaderDestroy(mat); 65679bdfe76SSatish Balay return 0; 65779bdfe76SSatish Balay } 65879bdfe76SSatish Balay 6595615d1e5SSatish Balay #undef __FUNC__ 6605615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 661cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 662cee3aa6bSSatish Balay { 663cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 66447b4a8eaSLois Curfman McInnes int ierr, nt; 665cee3aa6bSSatish Balay 666c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 66747b4a8eaSLois Curfman McInnes if (nt != a->n) { 668e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and xx"); 66947b4a8eaSLois Curfman McInnes } 670c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 67147b4a8eaSLois Curfman McInnes if (nt != a->m) { 672e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 67347b4a8eaSLois Curfman McInnes } 67443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 675cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 67643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 677cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 67843a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 679cee3aa6bSSatish Balay return 0; 680cee3aa6bSSatish Balay } 681cee3aa6bSSatish Balay 6825615d1e5SSatish Balay #undef __FUNC__ 6835615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 684cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 685cee3aa6bSSatish Balay { 686cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 687cee3aa6bSSatish Balay int ierr; 68843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 689cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 69043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 691cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 692cee3aa6bSSatish Balay return 0; 693cee3aa6bSSatish Balay } 694cee3aa6bSSatish Balay 6955615d1e5SSatish Balay #undef __FUNC__ 6965615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 697cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 698cee3aa6bSSatish Balay { 699cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 700cee3aa6bSSatish Balay int ierr; 701cee3aa6bSSatish Balay 702cee3aa6bSSatish Balay /* do nondiagonal part */ 703cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 704cee3aa6bSSatish Balay /* send it on its way */ 705537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 706cee3aa6bSSatish Balay /* do local part */ 707cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 708cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 709cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 710cee3aa6bSSatish Balay /* but is not perhaps always true. */ 711639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 712cee3aa6bSSatish Balay return 0; 713cee3aa6bSSatish Balay } 714cee3aa6bSSatish Balay 7155615d1e5SSatish Balay #undef __FUNC__ 7165615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 717cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 718cee3aa6bSSatish Balay { 719cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 720cee3aa6bSSatish Balay int ierr; 721cee3aa6bSSatish Balay 722cee3aa6bSSatish Balay /* do nondiagonal part */ 723cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 724cee3aa6bSSatish Balay /* send it on its way */ 725537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 726cee3aa6bSSatish Balay /* do local part */ 727cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 728cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 729cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 730cee3aa6bSSatish Balay /* but is not perhaps always true. */ 731537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 732cee3aa6bSSatish Balay return 0; 733cee3aa6bSSatish Balay } 734cee3aa6bSSatish Balay 735cee3aa6bSSatish Balay /* 736cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 737cee3aa6bSSatish Balay diagonal block 738cee3aa6bSSatish Balay */ 7395615d1e5SSatish Balay #undef __FUNC__ 7405615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 741cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 742cee3aa6bSSatish Balay { 743cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 744cee3aa6bSSatish Balay if (a->M != a->N) 745e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 746cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 747cee3aa6bSSatish Balay } 748cee3aa6bSSatish Balay 7495615d1e5SSatish Balay #undef __FUNC__ 7505615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 751cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 752cee3aa6bSSatish Balay { 753cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 754cee3aa6bSSatish Balay int ierr; 755cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 756cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 757cee3aa6bSSatish Balay return 0; 758cee3aa6bSSatish Balay } 759026e39d0SSatish Balay 7605615d1e5SSatish Balay #undef __FUNC__ 7615615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 76257b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 76357b952d6SSatish Balay { 76457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 76557b952d6SSatish Balay *m = mat->M; *n = mat->N; 76657b952d6SSatish Balay return 0; 76757b952d6SSatish Balay } 76857b952d6SSatish Balay 7695615d1e5SSatish Balay #undef __FUNC__ 7705615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 77157b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 77257b952d6SSatish Balay { 77357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 77457b952d6SSatish Balay *m = mat->m; *n = mat->N; 77557b952d6SSatish Balay return 0; 77657b952d6SSatish Balay } 77757b952d6SSatish Balay 7785615d1e5SSatish Balay #undef __FUNC__ 7795615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 78057b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 78157b952d6SSatish Balay { 78257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 78357b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 78457b952d6SSatish Balay return 0; 78557b952d6SSatish Balay } 78657b952d6SSatish Balay 787acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 788acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 789acdf5bf4SSatish Balay 7905615d1e5SSatish Balay #undef __FUNC__ 7915615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 792acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 793acdf5bf4SSatish Balay { 794acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 795acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 796acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 797d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 798d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 799acdf5bf4SSatish Balay 800e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 801acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 802acdf5bf4SSatish Balay 803acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 804acdf5bf4SSatish Balay /* 805acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 806acdf5bf4SSatish Balay */ 807acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 808bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 809bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 810acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 811acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 812acdf5bf4SSatish Balay } 813acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 814acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 815acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 816acdf5bf4SSatish Balay } 817acdf5bf4SSatish Balay 818acdf5bf4SSatish Balay 819e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 820d9d09a02SSatish Balay lrow = row - brstart; 821acdf5bf4SSatish Balay 822acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 823acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 824acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 825acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 826acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 827acdf5bf4SSatish Balay nztot = nzA + nzB; 828acdf5bf4SSatish Balay 829acdf5bf4SSatish Balay cmap = mat->garray; 830acdf5bf4SSatish Balay if (v || idx) { 831acdf5bf4SSatish Balay if (nztot) { 832acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 833acdf5bf4SSatish Balay int imark = -1; 834acdf5bf4SSatish Balay if (v) { 835acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 836acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 837d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 838acdf5bf4SSatish Balay else break; 839acdf5bf4SSatish Balay } 840acdf5bf4SSatish Balay imark = i; 841acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 842acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 843acdf5bf4SSatish Balay } 844acdf5bf4SSatish Balay if (idx) { 845acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 846acdf5bf4SSatish Balay if (imark > -1) { 847acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 848bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 849acdf5bf4SSatish Balay } 850acdf5bf4SSatish Balay } else { 851acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 852d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 853d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 854acdf5bf4SSatish Balay else break; 855acdf5bf4SSatish Balay } 856acdf5bf4SSatish Balay imark = i; 857acdf5bf4SSatish Balay } 858d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 859d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 860acdf5bf4SSatish Balay } 861acdf5bf4SSatish Balay } 862d212a18eSSatish Balay else { 863d212a18eSSatish Balay if (idx) *idx = 0; 864d212a18eSSatish Balay if (v) *v = 0; 865d212a18eSSatish Balay } 866acdf5bf4SSatish Balay } 867acdf5bf4SSatish Balay *nz = nztot; 868acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 869acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 870acdf5bf4SSatish Balay return 0; 871acdf5bf4SSatish Balay } 872acdf5bf4SSatish Balay 8735615d1e5SSatish Balay #undef __FUNC__ 8745615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 875acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 876acdf5bf4SSatish Balay { 877acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 878acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 879e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 880acdf5bf4SSatish Balay } 881acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 882acdf5bf4SSatish Balay return 0; 883acdf5bf4SSatish Balay } 884acdf5bf4SSatish Balay 8855615d1e5SSatish Balay #undef __FUNC__ 8865615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 8875a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 8885a838052SSatish Balay { 8895a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 8905a838052SSatish Balay *bs = baij->bs; 8915a838052SSatish Balay return 0; 8925a838052SSatish Balay } 8935a838052SSatish Balay 8945615d1e5SSatish Balay #undef __FUNC__ 8955615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 89658667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 89758667388SSatish Balay { 89858667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 89958667388SSatish Balay int ierr; 90058667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 90158667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 90258667388SSatish Balay return 0; 90358667388SSatish Balay } 9040ac07820SSatish Balay 9055615d1e5SSatish Balay #undef __FUNC__ 9065615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 9074e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 9080ac07820SSatish Balay { 9094e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 9104e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 9117d57db60SLois Curfman McInnes int ierr; 9127d57db60SLois Curfman McInnes double isend[5], irecv[5]; 9130ac07820SSatish Balay 9144e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 9154e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 9164e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 9174e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 9184e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 9194e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 9204e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 9214e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 9224e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 9230ac07820SSatish Balay if (flag == MAT_LOCAL) { 9244e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 9254e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 9264e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 9274e220ebcSLois Curfman McInnes info->memory = isend[3]; 9284e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 9290ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 9300ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm); 9314e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9324e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9334e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9344e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9354e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 9360ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 9370ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm); 9384e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9394e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9404e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9414e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9424e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 9430ac07820SSatish Balay } 9444e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 9454e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9464e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9470ac07820SSatish Balay return 0; 9480ac07820SSatish Balay } 9490ac07820SSatish Balay 9505615d1e5SSatish Balay #undef __FUNC__ 9515615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 95258667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 95358667388SSatish Balay { 95458667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 95558667388SSatish Balay 95658667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 95758667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 9586da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 959b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 960b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 961b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 962b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 963aeafbbfcSLois Curfman McInnes a->roworiented = 1; 96458667388SSatish Balay MatSetOption(a->A,op); 96558667388SSatish Balay MatSetOption(a->B,op); 966b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 9676da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 96858667388SSatish Balay op == MAT_SYMMETRIC || 96958667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 97058667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 97158667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 97258667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 97358667388SSatish Balay a->roworiented = 0; 97458667388SSatish Balay MatSetOption(a->A,op); 97558667388SSatish Balay MatSetOption(a->B,op); 97690f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 97790f02eecSBarry Smith a->donotstash = 1; 97890f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 979e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 98058667388SSatish Balay else 981e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 98258667388SSatish Balay return 0; 98358667388SSatish Balay } 98458667388SSatish Balay 9855615d1e5SSatish Balay #undef __FUNC__ 9865615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 9870ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 9880ac07820SSatish Balay { 9890ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 9900ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 9910ac07820SSatish Balay Mat B; 9920ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 9930ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 9940ac07820SSatish Balay Scalar *a; 9950ac07820SSatish Balay 9960ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 997e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 9980ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 9990ac07820SSatish Balay CHKERRQ(ierr); 10000ac07820SSatish Balay 10010ac07820SSatish Balay /* copy over the A part */ 10020ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 10030ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 10040ac07820SSatish Balay row = baij->rstart; 10050ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 10060ac07820SSatish Balay 10070ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 10080ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 10090ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 10100ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 10110ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 10120ac07820SSatish Balay for (k=0; k<bs; k++ ) { 10130ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 10140ac07820SSatish Balay col++; a += bs; 10150ac07820SSatish Balay } 10160ac07820SSatish Balay } 10170ac07820SSatish Balay } 10180ac07820SSatish Balay /* copy over the B part */ 10190ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 10200ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 10210ac07820SSatish Balay row = baij->rstart*bs; 10220ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 10230ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 10240ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 10250ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 10260ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 10270ac07820SSatish Balay for (k=0; k<bs; k++ ) { 10280ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 10290ac07820SSatish Balay col++; a += bs; 10300ac07820SSatish Balay } 10310ac07820SSatish Balay } 10320ac07820SSatish Balay } 10330ac07820SSatish Balay PetscFree(rvals); 10340ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10350ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10360ac07820SSatish Balay 10370ac07820SSatish Balay if (matout != PETSC_NULL) { 10380ac07820SSatish Balay *matout = B; 10390ac07820SSatish Balay } else { 10400ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 10410ac07820SSatish Balay PetscFree(baij->rowners); 10420ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 10430ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 10440ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 10450ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 10460ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 10470ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 10480ac07820SSatish Balay PetscFree(baij); 10490ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 10500ac07820SSatish Balay PetscHeaderDestroy(B); 10510ac07820SSatish Balay } 10520ac07820SSatish Balay return 0; 10530ac07820SSatish Balay } 10540e95ebc0SSatish Balay 10555615d1e5SSatish Balay #undef __FUNC__ 10565615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 10570e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 10580e95ebc0SSatish Balay { 10590e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 10600e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 10610e95ebc0SSatish Balay int ierr,s1,s2,s3; 10620e95ebc0SSatish Balay 10630e95ebc0SSatish Balay if (ll) { 10640e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 10650e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1066e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 10670e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 10680e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 10690e95ebc0SSatish Balay } 1070e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 10710e95ebc0SSatish Balay return 0; 10720e95ebc0SSatish Balay } 10730e95ebc0SSatish Balay 10740ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 10750ac07820SSatish Balay matrix is square and the column and row owerships are identical. 10760ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 10770ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 10780ac07820SSatish Balay routine. 10790ac07820SSatish Balay */ 10805615d1e5SSatish Balay #undef __FUNC__ 10815615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 10820ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 10830ac07820SSatish Balay { 10840ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 10850ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 10860ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 10870ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 10880ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 10890ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 10900ac07820SSatish Balay MPI_Comm comm = A->comm; 10910ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 10920ac07820SSatish Balay MPI_Status recv_status,*send_status; 10930ac07820SSatish Balay IS istmp; 10940ac07820SSatish Balay 10950ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 10960ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 10970ac07820SSatish Balay 10980ac07820SSatish Balay /* first count number of contributors to each processor */ 10990ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 11000ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 11010ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 11020ac07820SSatish Balay for ( i=0; i<N; i++ ) { 11030ac07820SSatish Balay idx = rows[i]; 11040ac07820SSatish Balay found = 0; 11050ac07820SSatish Balay for ( j=0; j<size; j++ ) { 11060ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 11070ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 11080ac07820SSatish Balay } 11090ac07820SSatish Balay } 1110e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 11110ac07820SSatish Balay } 11120ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 11130ac07820SSatish Balay 11140ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 11150ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 11160ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 11170ac07820SSatish Balay nrecvs = work[rank]; 11180ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 11190ac07820SSatish Balay nmax = work[rank]; 11200ac07820SSatish Balay PetscFree(work); 11210ac07820SSatish Balay 11220ac07820SSatish Balay /* post receives: */ 11230ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 11240ac07820SSatish Balay CHKPTRQ(rvalues); 11250ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 11260ac07820SSatish Balay CHKPTRQ(recv_waits); 11270ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 11280ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 11290ac07820SSatish Balay } 11300ac07820SSatish Balay 11310ac07820SSatish Balay /* do sends: 11320ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 11330ac07820SSatish Balay the ith processor 11340ac07820SSatish Balay */ 11350ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 11360ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 11370ac07820SSatish Balay CHKPTRQ(send_waits); 11380ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 11390ac07820SSatish Balay starts[0] = 0; 11400ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 11410ac07820SSatish Balay for ( i=0; i<N; i++ ) { 11420ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 11430ac07820SSatish Balay } 11440ac07820SSatish Balay ISRestoreIndices(is,&rows); 11450ac07820SSatish Balay 11460ac07820SSatish Balay starts[0] = 0; 11470ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 11480ac07820SSatish Balay count = 0; 11490ac07820SSatish Balay for ( i=0; i<size; i++ ) { 11500ac07820SSatish Balay if (procs[i]) { 11510ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 11520ac07820SSatish Balay } 11530ac07820SSatish Balay } 11540ac07820SSatish Balay PetscFree(starts); 11550ac07820SSatish Balay 11560ac07820SSatish Balay base = owners[rank]*bs; 11570ac07820SSatish Balay 11580ac07820SSatish Balay /* wait on receives */ 11590ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 11600ac07820SSatish Balay source = lens + nrecvs; 11610ac07820SSatish Balay count = nrecvs; slen = 0; 11620ac07820SSatish Balay while (count) { 11630ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 11640ac07820SSatish Balay /* unpack receives into our local space */ 11650ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 11660ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 11670ac07820SSatish Balay lens[imdex] = n; 11680ac07820SSatish Balay slen += n; 11690ac07820SSatish Balay count--; 11700ac07820SSatish Balay } 11710ac07820SSatish Balay PetscFree(recv_waits); 11720ac07820SSatish Balay 11730ac07820SSatish Balay /* move the data into the send scatter */ 11740ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 11750ac07820SSatish Balay count = 0; 11760ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 11770ac07820SSatish Balay values = rvalues + i*nmax; 11780ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 11790ac07820SSatish Balay lrows[count++] = values[j] - base; 11800ac07820SSatish Balay } 11810ac07820SSatish Balay } 11820ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 11830ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 11840ac07820SSatish Balay 11850ac07820SSatish Balay /* actually zap the local rows */ 1186537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 11870ac07820SSatish Balay PLogObjectParent(A,istmp); 11880ac07820SSatish Balay PetscFree(lrows); 11890ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 11900ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 11910ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 11920ac07820SSatish Balay 11930ac07820SSatish Balay /* wait on sends */ 11940ac07820SSatish Balay if (nsends) { 11950ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 11960ac07820SSatish Balay CHKPTRQ(send_status); 11970ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 11980ac07820SSatish Balay PetscFree(send_status); 11990ac07820SSatish Balay } 12000ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 12010ac07820SSatish Balay 12020ac07820SSatish Balay return 0; 12030ac07820SSatish Balay } 1204ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 12055615d1e5SSatish Balay #undef __FUNC__ 12065615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1207ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A) 1208ba4ca20aSSatish Balay { 1209ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1210ba4ca20aSSatish Balay 1211ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1212ba4ca20aSSatish Balay else return 0; 1213ba4ca20aSSatish Balay } 12140ac07820SSatish Balay 12155615d1e5SSatish Balay #undef __FUNC__ 12165615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1217bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A) 1218bb5a7306SBarry Smith { 1219bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1220bb5a7306SBarry Smith int ierr; 1221bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1222bb5a7306SBarry Smith return 0; 1223bb5a7306SBarry Smith } 1224bb5a7306SBarry Smith 12250ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 12260ac07820SSatish Balay 122779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 122879bdfe76SSatish Balay static struct _MatOps MatOps = { 1229bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 12304c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 12314c50302cSBarry Smith 0,0,0,0, 12320ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 12330e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 123458667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 12354c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 12364c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 12374c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1238905e6a2fSBarry Smith 0,0,0,MatConvertSameType_MPIBAIJ,0,0, 1239d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1240ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1241bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1242bb5a7306SBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ}; 124379bdfe76SSatish Balay 124479bdfe76SSatish Balay 12455615d1e5SSatish Balay #undef __FUNC__ 12465615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 124779bdfe76SSatish Balay /*@C 124879bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 124979bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 125079bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 125179bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 125279bdfe76SSatish Balay performance can be increased by more than a factor of 50. 125379bdfe76SSatish Balay 125479bdfe76SSatish Balay Input Parameters: 125579bdfe76SSatish Balay . comm - MPI communicator 125679bdfe76SSatish Balay . bs - size of blockk 125779bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 125892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 125992e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 126092e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 126192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 126292e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 126379bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 126492e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 126579bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 126679bdfe76SSatish Balay submatrix (same for all local rows) 126792e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 126892e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 126992e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 127092e8d321SLois Curfman McInnes it is zero. 127192e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 127279bdfe76SSatish Balay submatrix (same for all local rows). 127392e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 127492e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 127592e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 127679bdfe76SSatish Balay 127779bdfe76SSatish Balay Output Parameter: 127879bdfe76SSatish Balay . A - the matrix 127979bdfe76SSatish Balay 128079bdfe76SSatish Balay Notes: 128179bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 128279bdfe76SSatish Balay (possibly both). 128379bdfe76SSatish Balay 128479bdfe76SSatish Balay Storage Information: 128579bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 128679bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 128779bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 128879bdfe76SSatish Balay local matrix (a rectangular submatrix). 128979bdfe76SSatish Balay 129079bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 129179bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 129279bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 129379bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 129479bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 129579bdfe76SSatish Balay 129679bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 129779bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 129879bdfe76SSatish Balay 129979bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 130079bdfe76SSatish Balay $ ------------------- 130179bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 130279bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 130379bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 130479bdfe76SSatish Balay $ ------------------- 130579bdfe76SSatish Balay $ 130679bdfe76SSatish Balay 130779bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 130879bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 130979bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 131057b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 131179bdfe76SSatish Balay 131279bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 131379bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 131479bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 131592e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 131692e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 13176da5968aSLois Curfman McInnes matrices. 131879bdfe76SSatish Balay 131992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 132079bdfe76SSatish Balay 132179bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 132279bdfe76SSatish Balay @*/ 132379bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 132479bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 132579bdfe76SSatish Balay { 132679bdfe76SSatish Balay Mat B; 132779bdfe76SSatish Balay Mat_MPIBAIJ *b; 13284c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 132979bdfe76SSatish Balay 1330e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 133179bdfe76SSatish Balay *A = 0; 133279bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 133379bdfe76SSatish Balay PLogObjectCreate(B); 133479bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 133579bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 133679bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 13374c50302cSBarry Smith MPI_Comm_size(comm,&size); 13384c50302cSBarry Smith if (size == 1) { 13394c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 13404c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 13414c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 13424c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 13434c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 13444c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 13454c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 13464c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 13474c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 13484c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 13494c50302cSBarry Smith } 13504c50302cSBarry Smith 135179bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 135279bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 135390f02eecSBarry Smith B->mapping = 0; 135479bdfe76SSatish Balay B->factor = 0; 135579bdfe76SSatish Balay B->assembled = PETSC_FALSE; 135679bdfe76SSatish Balay 135779bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 135879bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 135979bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 136079bdfe76SSatish Balay 136179bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1362e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1363e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1364e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1365cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1366cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 136779bdfe76SSatish Balay 136879bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 136979bdfe76SSatish Balay work[0] = m; work[1] = n; 137079bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 137179bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 137279bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 137379bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 137479bdfe76SSatish Balay } 137579bdfe76SSatish Balay if (m == PETSC_DECIDE) { 137679bdfe76SSatish Balay Mbs = M/bs; 1377e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 137879bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 137979bdfe76SSatish Balay m = mbs*bs; 138079bdfe76SSatish Balay } 138179bdfe76SSatish Balay if (n == PETSC_DECIDE) { 138279bdfe76SSatish Balay Nbs = N/bs; 1383e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 138479bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 138579bdfe76SSatish Balay n = nbs*bs; 138679bdfe76SSatish Balay } 1387e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 138879bdfe76SSatish Balay 138979bdfe76SSatish Balay b->m = m; B->m = m; 139079bdfe76SSatish Balay b->n = n; B->n = n; 139179bdfe76SSatish Balay b->N = N; B->N = N; 139279bdfe76SSatish Balay b->M = M; B->M = M; 139379bdfe76SSatish Balay b->bs = bs; 139479bdfe76SSatish Balay b->bs2 = bs*bs; 139579bdfe76SSatish Balay b->mbs = mbs; 139679bdfe76SSatish Balay b->nbs = nbs; 139779bdfe76SSatish Balay b->Mbs = Mbs; 139879bdfe76SSatish Balay b->Nbs = Nbs; 139979bdfe76SSatish Balay 140079bdfe76SSatish Balay /* build local table of row and column ownerships */ 140179bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 140279bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 14030ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 140479bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 140579bdfe76SSatish Balay b->rowners[0] = 0; 140679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 140779bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 140879bdfe76SSatish Balay } 140979bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 141079bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 14114fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 14124fa0d573SSatish Balay b->rend_bs = b->rend * bs; 14134fa0d573SSatish Balay 141479bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 141579bdfe76SSatish Balay b->cowners[0] = 0; 141679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 141779bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 141879bdfe76SSatish Balay } 141979bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 142079bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 14214fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 14224fa0d573SSatish Balay b->cend_bs = b->cend * bs; 142379bdfe76SSatish Balay 142479bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 142579bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 142679bdfe76SSatish Balay PLogObjectParent(B,b->A); 142779bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 142879bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 142979bdfe76SSatish Balay PLogObjectParent(B,b->B); 143079bdfe76SSatish Balay 143179bdfe76SSatish Balay /* build cache for off array entries formed */ 143279bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 143390f02eecSBarry Smith b->donotstash = 0; 143479bdfe76SSatish Balay b->colmap = 0; 143579bdfe76SSatish Balay b->garray = 0; 143679bdfe76SSatish Balay b->roworiented = 1; 143779bdfe76SSatish Balay 143879bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 143979bdfe76SSatish Balay b->lvec = 0; 144079bdfe76SSatish Balay b->Mvctx = 0; 144179bdfe76SSatish Balay 144279bdfe76SSatish Balay /* stuff for MatGetRow() */ 144379bdfe76SSatish Balay b->rowindices = 0; 144479bdfe76SSatish Balay b->rowvalues = 0; 144579bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 144679bdfe76SSatish Balay 144779bdfe76SSatish Balay *A = B; 144879bdfe76SSatish Balay return 0; 144979bdfe76SSatish Balay } 1450026e39d0SSatish Balay 14515615d1e5SSatish Balay #undef __FUNC__ 14525615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 14530ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 14540ac07820SSatish Balay { 14550ac07820SSatish Balay Mat mat; 14560ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 14570ac07820SSatish Balay int ierr, len=0, flg; 14580ac07820SSatish Balay 14590ac07820SSatish Balay *newmat = 0; 14600ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 14610ac07820SSatish Balay PLogObjectCreate(mat); 14620ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 14630ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 14640ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 14650ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 14660ac07820SSatish Balay mat->factor = matin->factor; 14670ac07820SSatish Balay mat->assembled = PETSC_TRUE; 14680ac07820SSatish Balay 14690ac07820SSatish Balay a->m = mat->m = oldmat->m; 14700ac07820SSatish Balay a->n = mat->n = oldmat->n; 14710ac07820SSatish Balay a->M = mat->M = oldmat->M; 14720ac07820SSatish Balay a->N = mat->N = oldmat->N; 14730ac07820SSatish Balay 14740ac07820SSatish Balay a->bs = oldmat->bs; 14750ac07820SSatish Balay a->bs2 = oldmat->bs2; 14760ac07820SSatish Balay a->mbs = oldmat->mbs; 14770ac07820SSatish Balay a->nbs = oldmat->nbs; 14780ac07820SSatish Balay a->Mbs = oldmat->Mbs; 14790ac07820SSatish Balay a->Nbs = oldmat->Nbs; 14800ac07820SSatish Balay 14810ac07820SSatish Balay a->rstart = oldmat->rstart; 14820ac07820SSatish Balay a->rend = oldmat->rend; 14830ac07820SSatish Balay a->cstart = oldmat->cstart; 14840ac07820SSatish Balay a->cend = oldmat->cend; 14850ac07820SSatish Balay a->size = oldmat->size; 14860ac07820SSatish Balay a->rank = oldmat->rank; 14870ac07820SSatish Balay a->insertmode = NOT_SET_VALUES; 14880ac07820SSatish Balay a->rowvalues = 0; 14890ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 14900ac07820SSatish Balay 14910ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 14920ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 14930ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 14940ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 14950ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14960ac07820SSatish Balay if (oldmat->colmap) { 14970ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 14980ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 14990ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 15000ac07820SSatish Balay } else a->colmap = 0; 15010ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 15020ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 15030ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 15040ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 15050ac07820SSatish Balay } else a->garray = 0; 15060ac07820SSatish Balay 15070ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 15080ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 15090ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 15100ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 15110ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 15120ac07820SSatish Balay PLogObjectParent(mat,a->A); 15130ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 15140ac07820SSatish Balay PLogObjectParent(mat,a->B); 15150ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 15160ac07820SSatish Balay if (flg) { 15170ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 15180ac07820SSatish Balay } 15190ac07820SSatish Balay *newmat = mat; 15200ac07820SSatish Balay return 0; 15210ac07820SSatish Balay } 152257b952d6SSatish Balay 152357b952d6SSatish Balay #include "sys.h" 152457b952d6SSatish Balay 15255615d1e5SSatish Balay #undef __FUNC__ 15265615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 152757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 152857b952d6SSatish Balay { 152957b952d6SSatish Balay Mat A; 153057b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 153157b952d6SSatish Balay Scalar *vals,*buf; 153257b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 153357b952d6SSatish Balay MPI_Status status; 1534cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 153557b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 153657b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 153757b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 153857b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 153957b952d6SSatish Balay 154057b952d6SSatish Balay 154157b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 154257b952d6SSatish Balay bs2 = bs*bs; 154357b952d6SSatish Balay 154457b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 154557b952d6SSatish Balay if (!rank) { 154657b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 154757b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1548e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 154957b952d6SSatish Balay } 155057b952d6SSatish Balay 155157b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 155257b952d6SSatish Balay M = header[1]; N = header[2]; 155357b952d6SSatish Balay 1554e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 155557b952d6SSatish Balay 155657b952d6SSatish Balay /* 155757b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 155857b952d6SSatish Balay divisible by the blocksize 155957b952d6SSatish Balay */ 156057b952d6SSatish Balay Mbs = M/bs; 156157b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 156257b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 156357b952d6SSatish Balay else Mbs++; 156457b952d6SSatish Balay if (extra_rows &&!rank) { 1565b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 156657b952d6SSatish Balay } 1567537820f0SBarry Smith 156857b952d6SSatish Balay /* determine ownership of all rows */ 156957b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 157057b952d6SSatish Balay m = mbs * bs; 1571cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1572cee3aa6bSSatish Balay browners = rowners + size + 1; 157357b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 157457b952d6SSatish Balay rowners[0] = 0; 1575cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1576cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 157757b952d6SSatish Balay rstart = rowners[rank]; 157857b952d6SSatish Balay rend = rowners[rank+1]; 157957b952d6SSatish Balay 158057b952d6SSatish Balay /* distribute row lengths to all processors */ 158157b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 158257b952d6SSatish Balay if (!rank) { 158357b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 158457b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 158557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 158657b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1587cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1588cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 158957b952d6SSatish Balay PetscFree(sndcounts); 159057b952d6SSatish Balay } 159157b952d6SSatish Balay else { 159257b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 159357b952d6SSatish Balay } 159457b952d6SSatish Balay 159557b952d6SSatish Balay if (!rank) { 159657b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 159757b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 159857b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 159957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 160057b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 160157b952d6SSatish Balay procsnz[i] += rowlengths[j]; 160257b952d6SSatish Balay } 160357b952d6SSatish Balay } 160457b952d6SSatish Balay PetscFree(rowlengths); 160557b952d6SSatish Balay 160657b952d6SSatish Balay /* determine max buffer needed and allocate it */ 160757b952d6SSatish Balay maxnz = 0; 160857b952d6SSatish Balay for ( i=0; i<size; i++ ) { 160957b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 161057b952d6SSatish Balay } 161157b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 161257b952d6SSatish Balay 161357b952d6SSatish Balay /* read in my part of the matrix column indices */ 161457b952d6SSatish Balay nz = procsnz[0]; 161557b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 161657b952d6SSatish Balay mycols = ibuf; 1617cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 161857b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1619cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1620cee3aa6bSSatish Balay 162157b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 162257b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 162357b952d6SSatish Balay nz = procsnz[i]; 162457b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 162557b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 162657b952d6SSatish Balay } 162757b952d6SSatish Balay /* read in the stuff for the last proc */ 162857b952d6SSatish Balay if ( size != 1 ) { 162957b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 163057b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 163157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1632cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 163357b952d6SSatish Balay } 163457b952d6SSatish Balay PetscFree(cols); 163557b952d6SSatish Balay } 163657b952d6SSatish Balay else { 163757b952d6SSatish Balay /* determine buffer space needed for message */ 163857b952d6SSatish Balay nz = 0; 163957b952d6SSatish Balay for ( i=0; i<m; i++ ) { 164057b952d6SSatish Balay nz += locrowlens[i]; 164157b952d6SSatish Balay } 164257b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 164357b952d6SSatish Balay mycols = ibuf; 164457b952d6SSatish Balay /* receive message of column indices*/ 164557b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 164657b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1647e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 164857b952d6SSatish Balay } 164957b952d6SSatish Balay 165057b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1651cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1652cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 165357b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1654cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 165557b952d6SSatish Balay masked1 = mask + Mbs; 165657b952d6SSatish Balay masked2 = masked1 + Mbs; 165757b952d6SSatish Balay rowcount = 0; nzcount = 0; 165857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 165957b952d6SSatish Balay dcount = 0; 166057b952d6SSatish Balay odcount = 0; 166157b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 166257b952d6SSatish Balay kmax = locrowlens[rowcount]; 166357b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 166457b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 166557b952d6SSatish Balay if (!mask[tmp]) { 166657b952d6SSatish Balay mask[tmp] = 1; 166757b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 166857b952d6SSatish Balay else masked1[dcount++] = tmp; 166957b952d6SSatish Balay } 167057b952d6SSatish Balay } 167157b952d6SSatish Balay rowcount++; 167257b952d6SSatish Balay } 1673cee3aa6bSSatish Balay 167457b952d6SSatish Balay dlens[i] = dcount; 167557b952d6SSatish Balay odlens[i] = odcount; 1676cee3aa6bSSatish Balay 167757b952d6SSatish Balay /* zero out the mask elements we set */ 167857b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 167957b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 168057b952d6SSatish Balay } 1681cee3aa6bSSatish Balay 168257b952d6SSatish Balay /* create our matrix */ 1683537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1684537820f0SBarry Smith CHKERRQ(ierr); 168557b952d6SSatish Balay A = *newmat; 16866d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 168757b952d6SSatish Balay 168857b952d6SSatish Balay if (!rank) { 168957b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 169057b952d6SSatish Balay /* read in my part of the matrix numerical values */ 169157b952d6SSatish Balay nz = procsnz[0]; 169257b952d6SSatish Balay vals = buf; 1693cee3aa6bSSatish Balay mycols = ibuf; 1694cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 169557b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1696cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1697537820f0SBarry Smith 169857b952d6SSatish Balay /* insert into matrix */ 169957b952d6SSatish Balay jj = rstart*bs; 170057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 170157b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 170257b952d6SSatish Balay mycols += locrowlens[i]; 170357b952d6SSatish Balay vals += locrowlens[i]; 170457b952d6SSatish Balay jj++; 170557b952d6SSatish Balay } 170657b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 170757b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 170857b952d6SSatish Balay nz = procsnz[i]; 170957b952d6SSatish Balay vals = buf; 171057b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 171157b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 171257b952d6SSatish Balay } 171357b952d6SSatish Balay /* the last proc */ 171457b952d6SSatish Balay if ( size != 1 ){ 171557b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1716cee3aa6bSSatish Balay vals = buf; 171757b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 171857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1719cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 172057b952d6SSatish Balay } 172157b952d6SSatish Balay PetscFree(procsnz); 172257b952d6SSatish Balay } 172357b952d6SSatish Balay else { 172457b952d6SSatish Balay /* receive numeric values */ 172557b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 172657b952d6SSatish Balay 172757b952d6SSatish Balay /* receive message of values*/ 172857b952d6SSatish Balay vals = buf; 1729cee3aa6bSSatish Balay mycols = ibuf; 173057b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 173157b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1732e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 173357b952d6SSatish Balay 173457b952d6SSatish Balay /* insert into matrix */ 173557b952d6SSatish Balay jj = rstart*bs; 1736cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 173757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 173857b952d6SSatish Balay mycols += locrowlens[i]; 173957b952d6SSatish Balay vals += locrowlens[i]; 174057b952d6SSatish Balay jj++; 174157b952d6SSatish Balay } 174257b952d6SSatish Balay } 174357b952d6SSatish Balay PetscFree(locrowlens); 174457b952d6SSatish Balay PetscFree(buf); 174557b952d6SSatish Balay PetscFree(ibuf); 174657b952d6SSatish Balay PetscFree(rowners); 174757b952d6SSatish Balay PetscFree(dlens); 1748cee3aa6bSSatish Balay PetscFree(mask); 17496d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 17506d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 175157b952d6SSatish Balay return 0; 175257b952d6SSatish Balay } 175357b952d6SSatish Balay 175457b952d6SSatish Balay 1755