179bdfe76SSatish Balay #ifndef lint 2*80c1aa95SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.46 1997/01/06 20:25:27 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 243b2fbd54SBarry Smith 25537820f0SBarry Smith /* 26537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2957b952d6SSatish Balay length of colmap equals the global matrix length. 3057b952d6SSatish Balay */ 315615d1e5SSatish Balay #undef __FUNC__ 325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 37928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3857b952d6SSatish Balay 3957b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 4057b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4157b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 42928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4357b952d6SSatish Balay return 0; 4457b952d6SSatish Balay } 4557b952d6SSatish Balay 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 5057b952d6SSatish Balay { 513b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5257b952d6SSatish Balay int ierr; 533b2fbd54SBarry Smith if (aij->size == 1) { 543b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 55e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 563b2fbd54SBarry Smith return 0; 573b2fbd54SBarry Smith } 583b2fbd54SBarry Smith 595615d1e5SSatish Balay #undef __FUNC__ 605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 623b2fbd54SBarry Smith PetscTruth *done) 633b2fbd54SBarry Smith { 643b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 653b2fbd54SBarry Smith int ierr; 663b2fbd54SBarry Smith if (aij->size == 1) { 673b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 68e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6957b952d6SSatish Balay return 0; 7057b952d6SSatish Balay } 71*80c1aa95SSatish Balay #define CHUNKSIZE 10 72*80c1aa95SSatish Balay 73*80c1aa95SSatish Balay #define MatSetValues_SeqBAIJ_A_Private( new_A, row, col, value) \ 74*80c1aa95SSatish Balay { \ 75*80c1aa95SSatish Balay \ 76*80c1aa95SSatish Balay brow = row/bs; \ 77*80c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 78*80c1aa95SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; \ 79*80c1aa95SSatish Balay bcol = col/bs; \ 80*80c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 81*80c1aa95SSatish Balay for ( _i=0; _i<nrow; _i++ ) { \ 82*80c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 83*80c1aa95SSatish Balay if (rp[_i] == bcol) { \ 84*80c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 85*80c1aa95SSatish Balay *bap = value; \ 86*80c1aa95SSatish Balay goto _noinsert; \ 87*80c1aa95SSatish Balay } \ 88*80c1aa95SSatish Balay } \ 89*80c1aa95SSatish Balay if (nrow >= rmax) { \ 90*80c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 91*80c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 92*80c1aa95SSatish Balay Scalar *new_a; \ 93*80c1aa95SSatish Balay \ 94*80c1aa95SSatish Balay /* malloc new storage space */ \ 95*80c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 96*80c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 97*80c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 98*80c1aa95SSatish Balay new_i = new_j + new_nz; \ 99*80c1aa95SSatish Balay \ 100*80c1aa95SSatish Balay /* copy over old data into new slots */ \ 101*80c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 102*80c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 103*80c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 104*80c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 105*80c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 106*80c1aa95SSatish Balay len*sizeof(int)); \ 107*80c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 108*80c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 109*80c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 110*80c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 111*80c1aa95SSatish Balay /* free up old matrix storage */ \ 112*80c1aa95SSatish Balay PetscFree(a->a); \ 113*80c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 114*80c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 115*80c1aa95SSatish Balay a->singlemalloc = 1; \ 116*80c1aa95SSatish Balay \ 117*80c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 118*80c1aa95SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; \ 119*80c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 120*80c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 121*80c1aa95SSatish Balay a->reallocs++; \ 122*80c1aa95SSatish Balay a->nz++; \ 123*80c1aa95SSatish Balay } \ 124*80c1aa95SSatish Balay N = nrow++ - 1; \ 125*80c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 126*80c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 127*80c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 128*80c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 129*80c1aa95SSatish Balay } \ 130*80c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 131*80c1aa95SSatish Balay rp[_i] = bcol; \ 132*80c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 133*80c1aa95SSatish Balay _noinsert:; \ 134*80c1aa95SSatish Balay ailen[brow] = nrow; \ 135*80c1aa95SSatish Balay } 13657b952d6SSatish Balay 137639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 1385615d1e5SSatish Balay #undef __FUNC__ 1395615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 14057b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 14157b952d6SSatish Balay { 14257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 14357b952d6SSatish Balay Scalar value; 1444fa0d573SSatish Balay int ierr,i,j,row,col; 1454fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1464fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1474fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 14857b952d6SSatish Balay 149*80c1aa95SSatish Balay Mat A = baij->A; 150*80c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 151*80c1aa95SSatish Balay int *rp,ii,nrow,_i,rmax,N; 152*80c1aa95SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 153*80c1aa95SSatish Balay int *aj=a->j,brow,bcol; 154*80c1aa95SSatish Balay int ridx,cidx,bs2=a->bs2; 155*80c1aa95SSatish Balay Scalar *ap,*aa=a->a,*bap; 156*80c1aa95SSatish Balay 1572c3acbe9SBarry Smith #if defined(PETSC_BOPT_g) 15857b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 159e3372554SBarry Smith SETERRQ(1,0,"Cannot mix inserts and adds"); 16057b952d6SSatish Balay } 1612c3acbe9SBarry Smith #endif 16257b952d6SSatish Balay baij->insertmode = addv; 16357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 164639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 165e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 166e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 167639f9d9dSBarry Smith #endif 16857b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 16957b952d6SSatish Balay row = im[i] - rstart_orig; 17057b952d6SSatish Balay for ( j=0; j<n; j++ ) { 17157b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 17257b952d6SSatish Balay col = in[j] - cstart_orig; 17357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 174*80c1aa95SSatish Balay MatSetValues_SeqBAIJ_A_Private(baij->A,row,col,value); 175*80c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 17657b952d6SSatish Balay } 177639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 178e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 179e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 180639f9d9dSBarry Smith #endif 18157b952d6SSatish Balay else { 18257b952d6SSatish Balay if (mat->was_assembled) { 183905e6a2fSBarry Smith if (!baij->colmap) { 184905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 185905e6a2fSBarry Smith } 186905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 18757b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 18857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 18957b952d6SSatish Balay col = in[j]; 19057b952d6SSatish Balay } 19157b952d6SSatish Balay } 19257b952d6SSatish Balay else col = in[j]; 19357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 194639f9d9dSBarry Smith ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 19557b952d6SSatish Balay } 19657b952d6SSatish Balay } 19757b952d6SSatish Balay } 19857b952d6SSatish Balay else { 19990f02eecSBarry Smith if (roworiented && !baij->donotstash) { 20057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 20157b952d6SSatish Balay } 20257b952d6SSatish Balay else { 20390f02eecSBarry Smith if (!baij->donotstash) { 20457b952d6SSatish Balay row = im[i]; 20557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 20657b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 20757b952d6SSatish Balay } 20857b952d6SSatish Balay } 20957b952d6SSatish Balay } 21057b952d6SSatish Balay } 21190f02eecSBarry Smith } 21257b952d6SSatish Balay return 0; 21357b952d6SSatish Balay } 21457b952d6SSatish Balay 2155615d1e5SSatish Balay #undef __FUNC__ 2165615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 217d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 218d6de1c52SSatish Balay { 219d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 220d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 221d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 222d6de1c52SSatish Balay 223d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 224e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 225e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 226d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 227d6de1c52SSatish Balay row = idxm[i] - bsrstart; 228d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 229e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 230e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 231d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 232d6de1c52SSatish Balay col = idxn[j] - bscstart; 233d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 234d6de1c52SSatish Balay } 235d6de1c52SSatish Balay else { 236905e6a2fSBarry Smith if (!baij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 240dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 241d9d09a02SSatish Balay else { 242dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 243d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 244d6de1c52SSatish Balay } 245d6de1c52SSatish Balay } 246d6de1c52SSatish Balay } 247d9d09a02SSatish Balay } 248d6de1c52SSatish Balay else { 249e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 250d6de1c52SSatish Balay } 251d6de1c52SSatish Balay } 252d6de1c52SSatish Balay return 0; 253d6de1c52SSatish Balay } 254d6de1c52SSatish Balay 2555615d1e5SSatish Balay #undef __FUNC__ 2565615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 257d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 258d6de1c52SSatish Balay { 259d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 260d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 261acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 262d6de1c52SSatish Balay double sum = 0.0; 263d6de1c52SSatish Balay Scalar *v; 264d6de1c52SSatish Balay 265d6de1c52SSatish Balay if (baij->size == 1) { 266d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 267d6de1c52SSatish Balay } else { 268d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 269d6de1c52SSatish Balay v = amat->a; 270d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 271d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 272d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 273d6de1c52SSatish Balay #else 274d6de1c52SSatish Balay sum += (*v)*(*v); v++; 275d6de1c52SSatish Balay #endif 276d6de1c52SSatish Balay } 277d6de1c52SSatish Balay v = bmat->a; 278d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 279d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 280d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 281d6de1c52SSatish Balay #else 282d6de1c52SSatish Balay sum += (*v)*(*v); v++; 283d6de1c52SSatish Balay #endif 284d6de1c52SSatish Balay } 285d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 286d6de1c52SSatish Balay *norm = sqrt(*norm); 287d6de1c52SSatish Balay } 288acdf5bf4SSatish Balay else 289e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 290d6de1c52SSatish Balay } 291d6de1c52SSatish Balay return 0; 292d6de1c52SSatish Balay } 29357b952d6SSatish Balay 2945615d1e5SSatish Balay #undef __FUNC__ 2955615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 29657b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 29757b952d6SSatish Balay { 29857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 29957b952d6SSatish Balay MPI_Comm comm = mat->comm; 30057b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 30157b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 30257b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 30357b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 30457b952d6SSatish Balay InsertMode addv; 30557b952d6SSatish Balay Scalar *rvalues,*svalues; 30657b952d6SSatish Balay 30757b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 30857b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 30957b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 310e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 31157b952d6SSatish Balay } 31257b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 31357b952d6SSatish Balay 31457b952d6SSatish Balay /* first count number of contributors to each processor */ 31557b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 31657b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 31757b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 31857b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 31957b952d6SSatish Balay idx = baij->stash.idx[i]; 32057b952d6SSatish Balay for ( j=0; j<size; j++ ) { 32157b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 32257b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 32357b952d6SSatish Balay } 32457b952d6SSatish Balay } 32557b952d6SSatish Balay } 32657b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 32757b952d6SSatish Balay 32857b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 32957b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 33057b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 33157b952d6SSatish Balay nreceives = work[rank]; 33257b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 33357b952d6SSatish Balay nmax = work[rank]; 33457b952d6SSatish Balay PetscFree(work); 33557b952d6SSatish Balay 33657b952d6SSatish Balay /* post receives: 33757b952d6SSatish Balay 1) each message will consist of ordered pairs 33857b952d6SSatish Balay (global index,value) we store the global index as a double 33957b952d6SSatish Balay to simplify the message passing. 34057b952d6SSatish Balay 2) since we don't know how long each individual message is we 34157b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 34257b952d6SSatish Balay this is a lot of wasted space. 34357b952d6SSatish Balay 34457b952d6SSatish Balay 34557b952d6SSatish Balay This could be done better. 34657b952d6SSatish Balay */ 34757b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 34857b952d6SSatish Balay CHKPTRQ(rvalues); 34957b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 35057b952d6SSatish Balay CHKPTRQ(recv_waits); 35157b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 35257b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 35357b952d6SSatish Balay comm,recv_waits+i); 35457b952d6SSatish Balay } 35557b952d6SSatish Balay 35657b952d6SSatish Balay /* do sends: 35757b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 35857b952d6SSatish Balay the ith processor 35957b952d6SSatish Balay */ 36057b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 36157b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 36257b952d6SSatish Balay CHKPTRQ(send_waits); 36357b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 36457b952d6SSatish Balay starts[0] = 0; 36557b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 36657b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 36757b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 36857b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 36957b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 37057b952d6SSatish Balay } 37157b952d6SSatish Balay PetscFree(owner); 37257b952d6SSatish Balay starts[0] = 0; 37357b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 37457b952d6SSatish Balay count = 0; 37557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 37657b952d6SSatish Balay if (procs[i]) { 37757b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 37857b952d6SSatish Balay comm,send_waits+count++); 37957b952d6SSatish Balay } 38057b952d6SSatish Balay } 38157b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 38257b952d6SSatish Balay 38357b952d6SSatish Balay /* Free cache space */ 384d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 38557b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 38657b952d6SSatish Balay 38757b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 38857b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 38957b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 39057b952d6SSatish Balay baij->rmax = nmax; 39157b952d6SSatish Balay 39257b952d6SSatish Balay return 0; 39357b952d6SSatish Balay } 39457b952d6SSatish Balay 39557b952d6SSatish Balay 3965615d1e5SSatish Balay #undef __FUNC__ 3975615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 39857b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 39957b952d6SSatish Balay { 40057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 40157b952d6SSatish Balay MPI_Status *send_status,recv_status; 40257b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 40357b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 40457b952d6SSatish Balay Scalar *values,val; 40557b952d6SSatish Balay InsertMode addv = baij->insertmode; 40657b952d6SSatish Balay 40757b952d6SSatish Balay /* wait on receives */ 40857b952d6SSatish Balay while (count) { 40957b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 41057b952d6SSatish Balay /* unpack receives into our local space */ 41157b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 41257b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 41357b952d6SSatish Balay n = n/3; 41457b952d6SSatish Balay for ( i=0; i<n; i++ ) { 41557b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 41657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 41757b952d6SSatish Balay val = values[3*i+2]; 41857b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 41957b952d6SSatish Balay col -= baij->cstart*bs; 42057b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 42157b952d6SSatish Balay } 42257b952d6SSatish Balay else { 42357b952d6SSatish Balay if (mat->was_assembled) { 424905e6a2fSBarry Smith if (!baij->colmap) { 425905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 426905e6a2fSBarry Smith } 427905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 42857b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 42957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 43057b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 43157b952d6SSatish Balay } 43257b952d6SSatish Balay } 43357b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 43457b952d6SSatish Balay } 43557b952d6SSatish Balay } 43657b952d6SSatish Balay count--; 43757b952d6SSatish Balay } 43857b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 43957b952d6SSatish Balay 44057b952d6SSatish Balay /* wait on sends */ 44157b952d6SSatish Balay if (baij->nsends) { 44257b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 44357b952d6SSatish Balay CHKPTRQ(send_status); 44457b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 44557b952d6SSatish Balay PetscFree(send_status); 44657b952d6SSatish Balay } 44757b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 44857b952d6SSatish Balay 44957b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 45057b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 45157b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 45257b952d6SSatish Balay 45357b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 45457b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 45557b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 45657b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 45757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 45857b952d6SSatish Balay } 45957b952d6SSatish Balay 4606d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 46157b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 46257b952d6SSatish Balay } 46357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 46457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 46557b952d6SSatish Balay 46657b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 46757b952d6SSatish Balay return 0; 46857b952d6SSatish Balay } 46957b952d6SSatish Balay 4705615d1e5SSatish Balay #undef __FUNC__ 4715615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 47257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 47357b952d6SSatish Balay { 47457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 47557b952d6SSatish Balay int ierr; 47657b952d6SSatish Balay 47757b952d6SSatish Balay if (baij->size == 1) { 47857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 47957b952d6SSatish Balay } 480e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 48157b952d6SSatish Balay return 0; 48257b952d6SSatish Balay } 48357b952d6SSatish Balay 4845615d1e5SSatish Balay #undef __FUNC__ 4855615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 48657b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 48757b952d6SSatish Balay { 48857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 489cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 49057b952d6SSatish Balay FILE *fd; 49157b952d6SSatish Balay ViewerType vtype; 49257b952d6SSatish Balay 49357b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 49457b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 49557b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 496639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 4974e220ebcSLois Curfman McInnes MatInfo info; 49857b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 49957b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 5004e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 50157b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 50257b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 5034e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 5044e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 5054e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 5064e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 5074e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 5084e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 50957b952d6SSatish Balay fflush(fd); 51057b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 51157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 51257b952d6SSatish Balay return 0; 51357b952d6SSatish Balay } 514639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 515bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 51657b952d6SSatish Balay return 0; 51757b952d6SSatish Balay } 51857b952d6SSatish Balay } 51957b952d6SSatish Balay 52057b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 52157b952d6SSatish Balay Draw draw; 52257b952d6SSatish Balay PetscTruth isnull; 52357b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 52457b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 52557b952d6SSatish Balay } 52657b952d6SSatish Balay 52757b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 52857b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 52957b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 53057b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 53157b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 53257b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 53357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 53457b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 53557b952d6SSatish Balay fflush(fd); 53657b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 53757b952d6SSatish Balay } 53857b952d6SSatish Balay else { 53957b952d6SSatish Balay int size = baij->size; 54057b952d6SSatish Balay rank = baij->rank; 54157b952d6SSatish Balay if (size == 1) { 54257b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 54357b952d6SSatish Balay } 54457b952d6SSatish Balay else { 54557b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 54657b952d6SSatish Balay Mat A; 54757b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 54857b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 54957b952d6SSatish Balay int mbs=baij->mbs; 55057b952d6SSatish Balay Scalar *a; 55157b952d6SSatish Balay 55257b952d6SSatish Balay if (!rank) { 553cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 55457b952d6SSatish Balay CHKERRQ(ierr); 55557b952d6SSatish Balay } 55657b952d6SSatish Balay else { 557cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 55857b952d6SSatish Balay CHKERRQ(ierr); 55957b952d6SSatish Balay } 56057b952d6SSatish Balay PLogObjectParent(mat,A); 56157b952d6SSatish Balay 56257b952d6SSatish Balay /* copy over the A part */ 56357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 56457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 56557b952d6SSatish Balay row = baij->rstart; 56657b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 56757b952d6SSatish Balay 56857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 56957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 57057b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 57157b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 57257b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 57357b952d6SSatish Balay for (k=0; k<bs; k++ ) { 574cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 575cee3aa6bSSatish Balay col++; a += bs; 57657b952d6SSatish Balay } 57757b952d6SSatish Balay } 57857b952d6SSatish Balay } 57957b952d6SSatish Balay /* copy over the B part */ 58057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 58157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 58257b952d6SSatish Balay row = baij->rstart*bs; 58357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 58457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 58557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 58657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 58757b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 58857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 589cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 590cee3aa6bSSatish Balay col++; a += bs; 59157b952d6SSatish Balay } 59257b952d6SSatish Balay } 59357b952d6SSatish Balay } 59457b952d6SSatish Balay PetscFree(rvals); 5956d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5966d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 59757b952d6SSatish Balay if (!rank) { 59857b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 59957b952d6SSatish Balay } 60057b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 60157b952d6SSatish Balay } 60257b952d6SSatish Balay } 60357b952d6SSatish Balay return 0; 60457b952d6SSatish Balay } 60557b952d6SSatish Balay 60657b952d6SSatish Balay 60757b952d6SSatish Balay 6085615d1e5SSatish Balay #undef __FUNC__ 6095615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 61057b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 61157b952d6SSatish Balay { 61257b952d6SSatish Balay Mat mat = (Mat) obj; 61357b952d6SSatish Balay int ierr; 61457b952d6SSatish Balay ViewerType vtype; 61557b952d6SSatish Balay 61657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 61757b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 61857b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 61957b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 62057b952d6SSatish Balay } 62157b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 62257b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 62357b952d6SSatish Balay } 62457b952d6SSatish Balay return 0; 62557b952d6SSatish Balay } 62657b952d6SSatish Balay 6275615d1e5SSatish Balay #undef __FUNC__ 6285615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 62979bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 63079bdfe76SSatish Balay { 63179bdfe76SSatish Balay Mat mat = (Mat) obj; 63279bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 63379bdfe76SSatish Balay int ierr; 63479bdfe76SSatish Balay 63579bdfe76SSatish Balay #if defined(PETSC_LOG) 63679bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 63779bdfe76SSatish Balay #endif 63879bdfe76SSatish Balay 63979bdfe76SSatish Balay PetscFree(baij->rowners); 64079bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 64179bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 64279bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 64379bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 64479bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 64579bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 64679bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 64779bdfe76SSatish Balay PetscFree(baij); 64890f02eecSBarry Smith if (mat->mapping) { 64990f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 65090f02eecSBarry Smith } 65179bdfe76SSatish Balay PLogObjectDestroy(mat); 65279bdfe76SSatish Balay PetscHeaderDestroy(mat); 65379bdfe76SSatish Balay return 0; 65479bdfe76SSatish Balay } 65579bdfe76SSatish Balay 6565615d1e5SSatish Balay #undef __FUNC__ 6575615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 658cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 659cee3aa6bSSatish Balay { 660cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 66147b4a8eaSLois Curfman McInnes int ierr, nt; 662cee3aa6bSSatish Balay 663c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 66447b4a8eaSLois Curfman McInnes if (nt != a->n) { 665e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and xx"); 66647b4a8eaSLois Curfman McInnes } 667c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 66847b4a8eaSLois Curfman McInnes if (nt != a->m) { 669e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 67047b4a8eaSLois Curfman McInnes } 67143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 672cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 67343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 674cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 67543a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 676cee3aa6bSSatish Balay return 0; 677cee3aa6bSSatish Balay } 678cee3aa6bSSatish Balay 6795615d1e5SSatish Balay #undef __FUNC__ 6805615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 681cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 682cee3aa6bSSatish Balay { 683cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 684cee3aa6bSSatish Balay int ierr; 68543a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 686cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 68743a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 688cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 689cee3aa6bSSatish Balay return 0; 690cee3aa6bSSatish Balay } 691cee3aa6bSSatish Balay 6925615d1e5SSatish Balay #undef __FUNC__ 6935615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 694cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 695cee3aa6bSSatish Balay { 696cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 697cee3aa6bSSatish Balay int ierr; 698cee3aa6bSSatish Balay 699cee3aa6bSSatish Balay /* do nondiagonal part */ 700cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 701cee3aa6bSSatish Balay /* send it on its way */ 702537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 703cee3aa6bSSatish Balay /* do local part */ 704cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 705cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 706cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 707cee3aa6bSSatish Balay /* but is not perhaps always true. */ 708639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 709cee3aa6bSSatish Balay return 0; 710cee3aa6bSSatish Balay } 711cee3aa6bSSatish Balay 7125615d1e5SSatish Balay #undef __FUNC__ 7135615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 714cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 715cee3aa6bSSatish Balay { 716cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 717cee3aa6bSSatish Balay int ierr; 718cee3aa6bSSatish Balay 719cee3aa6bSSatish Balay /* do nondiagonal part */ 720cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 721cee3aa6bSSatish Balay /* send it on its way */ 722537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 723cee3aa6bSSatish Balay /* do local part */ 724cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 725cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 726cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 727cee3aa6bSSatish Balay /* but is not perhaps always true. */ 728537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 729cee3aa6bSSatish Balay return 0; 730cee3aa6bSSatish Balay } 731cee3aa6bSSatish Balay 732cee3aa6bSSatish Balay /* 733cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 734cee3aa6bSSatish Balay diagonal block 735cee3aa6bSSatish Balay */ 7365615d1e5SSatish Balay #undef __FUNC__ 7375615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 738cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 739cee3aa6bSSatish Balay { 740cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 741cee3aa6bSSatish Balay if (a->M != a->N) 742e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 743cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 744cee3aa6bSSatish Balay } 745cee3aa6bSSatish Balay 7465615d1e5SSatish Balay #undef __FUNC__ 7475615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 748cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 749cee3aa6bSSatish Balay { 750cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 751cee3aa6bSSatish Balay int ierr; 752cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 753cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 754cee3aa6bSSatish Balay return 0; 755cee3aa6bSSatish Balay } 756026e39d0SSatish Balay 7575615d1e5SSatish Balay #undef __FUNC__ 7585615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 75957b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 76057b952d6SSatish Balay { 76157b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 76257b952d6SSatish Balay *m = mat->M; *n = mat->N; 76357b952d6SSatish Balay return 0; 76457b952d6SSatish Balay } 76557b952d6SSatish Balay 7665615d1e5SSatish Balay #undef __FUNC__ 7675615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 76857b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 76957b952d6SSatish Balay { 77057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 77157b952d6SSatish Balay *m = mat->m; *n = mat->N; 77257b952d6SSatish Balay return 0; 77357b952d6SSatish Balay } 77457b952d6SSatish Balay 7755615d1e5SSatish Balay #undef __FUNC__ 7765615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 77757b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 77857b952d6SSatish Balay { 77957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 78057b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 78157b952d6SSatish Balay return 0; 78257b952d6SSatish Balay } 78357b952d6SSatish Balay 784acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 785acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 786acdf5bf4SSatish Balay 7875615d1e5SSatish Balay #undef __FUNC__ 7885615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 789acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 790acdf5bf4SSatish Balay { 791acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 792acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 793acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 794d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 795d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 796acdf5bf4SSatish Balay 797e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 798acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 799acdf5bf4SSatish Balay 800acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 801acdf5bf4SSatish Balay /* 802acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 803acdf5bf4SSatish Balay */ 804acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 805bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 806bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 807acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 808acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 809acdf5bf4SSatish Balay } 810acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 811acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 812acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 813acdf5bf4SSatish Balay } 814acdf5bf4SSatish Balay 815acdf5bf4SSatish Balay 816e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 817d9d09a02SSatish Balay lrow = row - brstart; 818acdf5bf4SSatish Balay 819acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 820acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 821acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 822acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 823acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 824acdf5bf4SSatish Balay nztot = nzA + nzB; 825acdf5bf4SSatish Balay 826acdf5bf4SSatish Balay cmap = mat->garray; 827acdf5bf4SSatish Balay if (v || idx) { 828acdf5bf4SSatish Balay if (nztot) { 829acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 830acdf5bf4SSatish Balay int imark = -1; 831acdf5bf4SSatish Balay if (v) { 832acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 833acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 834d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 835acdf5bf4SSatish Balay else break; 836acdf5bf4SSatish Balay } 837acdf5bf4SSatish Balay imark = i; 838acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 839acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 840acdf5bf4SSatish Balay } 841acdf5bf4SSatish Balay if (idx) { 842acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 843acdf5bf4SSatish Balay if (imark > -1) { 844acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 845bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 846acdf5bf4SSatish Balay } 847acdf5bf4SSatish Balay } else { 848acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 849d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 850d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 851acdf5bf4SSatish Balay else break; 852acdf5bf4SSatish Balay } 853acdf5bf4SSatish Balay imark = i; 854acdf5bf4SSatish Balay } 855d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 856d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 857acdf5bf4SSatish Balay } 858acdf5bf4SSatish Balay } 859d212a18eSSatish Balay else { 860d212a18eSSatish Balay if (idx) *idx = 0; 861d212a18eSSatish Balay if (v) *v = 0; 862d212a18eSSatish Balay } 863acdf5bf4SSatish Balay } 864acdf5bf4SSatish Balay *nz = nztot; 865acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 866acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 867acdf5bf4SSatish Balay return 0; 868acdf5bf4SSatish Balay } 869acdf5bf4SSatish Balay 8705615d1e5SSatish Balay #undef __FUNC__ 8715615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 872acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 873acdf5bf4SSatish Balay { 874acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 875acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 876e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 877acdf5bf4SSatish Balay } 878acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 879acdf5bf4SSatish Balay return 0; 880acdf5bf4SSatish Balay } 881acdf5bf4SSatish Balay 8825615d1e5SSatish Balay #undef __FUNC__ 8835615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 8845a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 8855a838052SSatish Balay { 8865a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 8875a838052SSatish Balay *bs = baij->bs; 8885a838052SSatish Balay return 0; 8895a838052SSatish Balay } 8905a838052SSatish Balay 8915615d1e5SSatish Balay #undef __FUNC__ 8925615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 89358667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 89458667388SSatish Balay { 89558667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 89658667388SSatish Balay int ierr; 89758667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 89858667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 89958667388SSatish Balay return 0; 90058667388SSatish Balay } 9010ac07820SSatish Balay 9025615d1e5SSatish Balay #undef __FUNC__ 9035615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 9044e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 9050ac07820SSatish Balay { 9064e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 9074e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 9087d57db60SLois Curfman McInnes int ierr; 9097d57db60SLois Curfman McInnes double isend[5], irecv[5]; 9100ac07820SSatish Balay 9114e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 9124e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 9134e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 9144e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 9154e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 9164e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 9174e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 9184e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 9194e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 9200ac07820SSatish Balay if (flag == MAT_LOCAL) { 9214e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 9224e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 9234e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 9244e220ebcSLois Curfman McInnes info->memory = isend[3]; 9254e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 9260ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 9270ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm); 9284e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9294e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9304e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9314e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9324e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 9330ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 9340ac07820SSatish Balay MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm); 9354e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 9364e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 9374e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 9384e220ebcSLois Curfman McInnes info->memory = irecv[3]; 9394e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 9400ac07820SSatish Balay } 9414e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 9424e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 9434e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 9440ac07820SSatish Balay return 0; 9450ac07820SSatish Balay } 9460ac07820SSatish Balay 9475615d1e5SSatish Balay #undef __FUNC__ 9485615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 94958667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 95058667388SSatish Balay { 95158667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 95258667388SSatish Balay 95358667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 95458667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 9556da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 956b1fbbac0SLois Curfman McInnes op == MAT_COLUMNS_SORTED) { 957b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 958b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 959b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 960aeafbbfcSLois Curfman McInnes a->roworiented = 1; 96158667388SSatish Balay MatSetOption(a->A,op); 96258667388SSatish Balay MatSetOption(a->B,op); 963b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 9646da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 96558667388SSatish Balay op == MAT_SYMMETRIC || 96658667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 96758667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 96858667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 96958667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 97058667388SSatish Balay a->roworiented = 0; 97158667388SSatish Balay MatSetOption(a->A,op); 97258667388SSatish Balay MatSetOption(a->B,op); 97390f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 97490f02eecSBarry Smith a->donotstash = 1; 97590f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 976e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 97758667388SSatish Balay else 978e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 97958667388SSatish Balay return 0; 98058667388SSatish Balay } 98158667388SSatish Balay 9825615d1e5SSatish Balay #undef __FUNC__ 9835615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 9840ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 9850ac07820SSatish Balay { 9860ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 9870ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 9880ac07820SSatish Balay Mat B; 9890ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 9900ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 9910ac07820SSatish Balay Scalar *a; 9920ac07820SSatish Balay 9930ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 994e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 9950ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 9960ac07820SSatish Balay CHKERRQ(ierr); 9970ac07820SSatish Balay 9980ac07820SSatish Balay /* copy over the A part */ 9990ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 10000ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 10010ac07820SSatish Balay row = baij->rstart; 10020ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 10030ac07820SSatish Balay 10040ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 10050ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 10060ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 10070ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 10080ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 10090ac07820SSatish Balay for (k=0; k<bs; k++ ) { 10100ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 10110ac07820SSatish Balay col++; a += bs; 10120ac07820SSatish Balay } 10130ac07820SSatish Balay } 10140ac07820SSatish Balay } 10150ac07820SSatish Balay /* copy over the B part */ 10160ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 10170ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 10180ac07820SSatish Balay row = baij->rstart*bs; 10190ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 10200ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 10210ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 10220ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 10230ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 10240ac07820SSatish Balay for (k=0; k<bs; k++ ) { 10250ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 10260ac07820SSatish Balay col++; a += bs; 10270ac07820SSatish Balay } 10280ac07820SSatish Balay } 10290ac07820SSatish Balay } 10300ac07820SSatish Balay PetscFree(rvals); 10310ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10320ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10330ac07820SSatish Balay 10340ac07820SSatish Balay if (matout != PETSC_NULL) { 10350ac07820SSatish Balay *matout = B; 10360ac07820SSatish Balay } else { 10370ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 10380ac07820SSatish Balay PetscFree(baij->rowners); 10390ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 10400ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 10410ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 10420ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 10430ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 10440ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 10450ac07820SSatish Balay PetscFree(baij); 10460ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 10470ac07820SSatish Balay PetscHeaderDestroy(B); 10480ac07820SSatish Balay } 10490ac07820SSatish Balay return 0; 10500ac07820SSatish Balay } 10510e95ebc0SSatish Balay 10525615d1e5SSatish Balay #undef __FUNC__ 10535615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 10540e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 10550e95ebc0SSatish Balay { 10560e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 10570e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 10580e95ebc0SSatish Balay int ierr,s1,s2,s3; 10590e95ebc0SSatish Balay 10600e95ebc0SSatish Balay if (ll) { 10610e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 10620e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1063e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 10640e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 10650e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 10660e95ebc0SSatish Balay } 1067e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 10680e95ebc0SSatish Balay return 0; 10690e95ebc0SSatish Balay } 10700e95ebc0SSatish Balay 10710ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 10720ac07820SSatish Balay matrix is square and the column and row owerships are identical. 10730ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 10740ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 10750ac07820SSatish Balay routine. 10760ac07820SSatish Balay */ 10775615d1e5SSatish Balay #undef __FUNC__ 10785615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 10790ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 10800ac07820SSatish Balay { 10810ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 10820ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 10830ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 10840ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 10850ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 10860ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 10870ac07820SSatish Balay MPI_Comm comm = A->comm; 10880ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 10890ac07820SSatish Balay MPI_Status recv_status,*send_status; 10900ac07820SSatish Balay IS istmp; 10910ac07820SSatish Balay 10920ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 10930ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 10940ac07820SSatish Balay 10950ac07820SSatish Balay /* first count number of contributors to each processor */ 10960ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 10970ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 10980ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 10990ac07820SSatish Balay for ( i=0; i<N; i++ ) { 11000ac07820SSatish Balay idx = rows[i]; 11010ac07820SSatish Balay found = 0; 11020ac07820SSatish Balay for ( j=0; j<size; j++ ) { 11030ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 11040ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 11050ac07820SSatish Balay } 11060ac07820SSatish Balay } 1107e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 11080ac07820SSatish Balay } 11090ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 11100ac07820SSatish Balay 11110ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 11120ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 11130ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 11140ac07820SSatish Balay nrecvs = work[rank]; 11150ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 11160ac07820SSatish Balay nmax = work[rank]; 11170ac07820SSatish Balay PetscFree(work); 11180ac07820SSatish Balay 11190ac07820SSatish Balay /* post receives: */ 11200ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 11210ac07820SSatish Balay CHKPTRQ(rvalues); 11220ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 11230ac07820SSatish Balay CHKPTRQ(recv_waits); 11240ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 11250ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 11260ac07820SSatish Balay } 11270ac07820SSatish Balay 11280ac07820SSatish Balay /* do sends: 11290ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 11300ac07820SSatish Balay the ith processor 11310ac07820SSatish Balay */ 11320ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 11330ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 11340ac07820SSatish Balay CHKPTRQ(send_waits); 11350ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 11360ac07820SSatish Balay starts[0] = 0; 11370ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 11380ac07820SSatish Balay for ( i=0; i<N; i++ ) { 11390ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 11400ac07820SSatish Balay } 11410ac07820SSatish Balay ISRestoreIndices(is,&rows); 11420ac07820SSatish Balay 11430ac07820SSatish Balay starts[0] = 0; 11440ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 11450ac07820SSatish Balay count = 0; 11460ac07820SSatish Balay for ( i=0; i<size; i++ ) { 11470ac07820SSatish Balay if (procs[i]) { 11480ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 11490ac07820SSatish Balay } 11500ac07820SSatish Balay } 11510ac07820SSatish Balay PetscFree(starts); 11520ac07820SSatish Balay 11530ac07820SSatish Balay base = owners[rank]*bs; 11540ac07820SSatish Balay 11550ac07820SSatish Balay /* wait on receives */ 11560ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 11570ac07820SSatish Balay source = lens + nrecvs; 11580ac07820SSatish Balay count = nrecvs; slen = 0; 11590ac07820SSatish Balay while (count) { 11600ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 11610ac07820SSatish Balay /* unpack receives into our local space */ 11620ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 11630ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 11640ac07820SSatish Balay lens[imdex] = n; 11650ac07820SSatish Balay slen += n; 11660ac07820SSatish Balay count--; 11670ac07820SSatish Balay } 11680ac07820SSatish Balay PetscFree(recv_waits); 11690ac07820SSatish Balay 11700ac07820SSatish Balay /* move the data into the send scatter */ 11710ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 11720ac07820SSatish Balay count = 0; 11730ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 11740ac07820SSatish Balay values = rvalues + i*nmax; 11750ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 11760ac07820SSatish Balay lrows[count++] = values[j] - base; 11770ac07820SSatish Balay } 11780ac07820SSatish Balay } 11790ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 11800ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 11810ac07820SSatish Balay 11820ac07820SSatish Balay /* actually zap the local rows */ 1183537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 11840ac07820SSatish Balay PLogObjectParent(A,istmp); 11850ac07820SSatish Balay PetscFree(lrows); 11860ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 11870ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 11880ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 11890ac07820SSatish Balay 11900ac07820SSatish Balay /* wait on sends */ 11910ac07820SSatish Balay if (nsends) { 11920ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 11930ac07820SSatish Balay CHKPTRQ(send_status); 11940ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 11950ac07820SSatish Balay PetscFree(send_status); 11960ac07820SSatish Balay } 11970ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 11980ac07820SSatish Balay 11990ac07820SSatish Balay return 0; 12000ac07820SSatish Balay } 1201ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 12025615d1e5SSatish Balay #undef __FUNC__ 12035615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1204ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A) 1205ba4ca20aSSatish Balay { 1206ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1207ba4ca20aSSatish Balay 1208ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1209ba4ca20aSSatish Balay else return 0; 1210ba4ca20aSSatish Balay } 12110ac07820SSatish Balay 12125615d1e5SSatish Balay #undef __FUNC__ 12135615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1214bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A) 1215bb5a7306SBarry Smith { 1216bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1217bb5a7306SBarry Smith int ierr; 1218bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1219bb5a7306SBarry Smith return 0; 1220bb5a7306SBarry Smith } 1221bb5a7306SBarry Smith 12220ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 12230ac07820SSatish Balay 122479bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 122579bdfe76SSatish Balay static struct _MatOps MatOps = { 1226bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 12274c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 12284c50302cSBarry Smith 0,0,0,0, 12290ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 12300e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 123158667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 12324c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 12334c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 12344c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1235905e6a2fSBarry Smith 0,0,0,MatConvertSameType_MPIBAIJ,0,0, 1236d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1237ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1238bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1239bb5a7306SBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ}; 124079bdfe76SSatish Balay 124179bdfe76SSatish Balay 12425615d1e5SSatish Balay #undef __FUNC__ 12435615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 124479bdfe76SSatish Balay /*@C 124579bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 124679bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 124779bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 124879bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 124979bdfe76SSatish Balay performance can be increased by more than a factor of 50. 125079bdfe76SSatish Balay 125179bdfe76SSatish Balay Input Parameters: 125279bdfe76SSatish Balay . comm - MPI communicator 125379bdfe76SSatish Balay . bs - size of blockk 125479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 125592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 125692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 125792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 125892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 125992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 126079bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 126192e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 126279bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 126379bdfe76SSatish Balay submatrix (same for all local rows) 126492e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 126592e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 126692e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 126792e8d321SLois Curfman McInnes it is zero. 126892e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 126979bdfe76SSatish Balay submatrix (same for all local rows). 127092e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 127192e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 127292e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 127379bdfe76SSatish Balay 127479bdfe76SSatish Balay Output Parameter: 127579bdfe76SSatish Balay . A - the matrix 127679bdfe76SSatish Balay 127779bdfe76SSatish Balay Notes: 127879bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 127979bdfe76SSatish Balay (possibly both). 128079bdfe76SSatish Balay 128179bdfe76SSatish Balay Storage Information: 128279bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 128379bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 128479bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 128579bdfe76SSatish Balay local matrix (a rectangular submatrix). 128679bdfe76SSatish Balay 128779bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 128879bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 128979bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 129079bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 129179bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 129279bdfe76SSatish Balay 129379bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 129479bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 129579bdfe76SSatish Balay 129679bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 129779bdfe76SSatish Balay $ ------------------- 129879bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 129979bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 130079bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 130179bdfe76SSatish Balay $ ------------------- 130279bdfe76SSatish Balay $ 130379bdfe76SSatish Balay 130479bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 130579bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 130679bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 130757b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 130879bdfe76SSatish Balay 130979bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 131079bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 131179bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 131292e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 131392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 13146da5968aSLois Curfman McInnes matrices. 131579bdfe76SSatish Balay 131692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 131779bdfe76SSatish Balay 131879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 131979bdfe76SSatish Balay @*/ 132079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 132179bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 132279bdfe76SSatish Balay { 132379bdfe76SSatish Balay Mat B; 132479bdfe76SSatish Balay Mat_MPIBAIJ *b; 13254c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 132679bdfe76SSatish Balay 1327e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 132879bdfe76SSatish Balay *A = 0; 132979bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 133079bdfe76SSatish Balay PLogObjectCreate(B); 133179bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 133279bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 133379bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 13344c50302cSBarry Smith MPI_Comm_size(comm,&size); 13354c50302cSBarry Smith if (size == 1) { 13364c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 13374c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 13384c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 13394c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 13404c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 13414c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 13424c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 13434c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 13444c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 13454c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 13464c50302cSBarry Smith } 13474c50302cSBarry Smith 134879bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 134979bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 135090f02eecSBarry Smith B->mapping = 0; 135179bdfe76SSatish Balay B->factor = 0; 135279bdfe76SSatish Balay B->assembled = PETSC_FALSE; 135379bdfe76SSatish Balay 135479bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 135579bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 135679bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 135779bdfe76SSatish Balay 135879bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1359e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1360e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1361e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1362cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1363cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 136479bdfe76SSatish Balay 136579bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 136679bdfe76SSatish Balay work[0] = m; work[1] = n; 136779bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 136879bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 136979bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 137079bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 137179bdfe76SSatish Balay } 137279bdfe76SSatish Balay if (m == PETSC_DECIDE) { 137379bdfe76SSatish Balay Mbs = M/bs; 1374e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 137579bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 137679bdfe76SSatish Balay m = mbs*bs; 137779bdfe76SSatish Balay } 137879bdfe76SSatish Balay if (n == PETSC_DECIDE) { 137979bdfe76SSatish Balay Nbs = N/bs; 1380e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 138179bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 138279bdfe76SSatish Balay n = nbs*bs; 138379bdfe76SSatish Balay } 1384e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 138579bdfe76SSatish Balay 138679bdfe76SSatish Balay b->m = m; B->m = m; 138779bdfe76SSatish Balay b->n = n; B->n = n; 138879bdfe76SSatish Balay b->N = N; B->N = N; 138979bdfe76SSatish Balay b->M = M; B->M = M; 139079bdfe76SSatish Balay b->bs = bs; 139179bdfe76SSatish Balay b->bs2 = bs*bs; 139279bdfe76SSatish Balay b->mbs = mbs; 139379bdfe76SSatish Balay b->nbs = nbs; 139479bdfe76SSatish Balay b->Mbs = Mbs; 139579bdfe76SSatish Balay b->Nbs = Nbs; 139679bdfe76SSatish Balay 139779bdfe76SSatish Balay /* build local table of row and column ownerships */ 139879bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 139979bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 14000ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 140179bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 140279bdfe76SSatish Balay b->rowners[0] = 0; 140379bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 140479bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 140579bdfe76SSatish Balay } 140679bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 140779bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 14084fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 14094fa0d573SSatish Balay b->rend_bs = b->rend * bs; 14104fa0d573SSatish Balay 141179bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 141279bdfe76SSatish Balay b->cowners[0] = 0; 141379bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 141479bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 141579bdfe76SSatish Balay } 141679bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 141779bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 14184fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 14194fa0d573SSatish Balay b->cend_bs = b->cend * bs; 142079bdfe76SSatish Balay 142179bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 142279bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 142379bdfe76SSatish Balay PLogObjectParent(B,b->A); 142479bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 142579bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 142679bdfe76SSatish Balay PLogObjectParent(B,b->B); 142779bdfe76SSatish Balay 142879bdfe76SSatish Balay /* build cache for off array entries formed */ 142979bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 143090f02eecSBarry Smith b->donotstash = 0; 143179bdfe76SSatish Balay b->colmap = 0; 143279bdfe76SSatish Balay b->garray = 0; 143379bdfe76SSatish Balay b->roworiented = 1; 143479bdfe76SSatish Balay 143579bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 143679bdfe76SSatish Balay b->lvec = 0; 143779bdfe76SSatish Balay b->Mvctx = 0; 143879bdfe76SSatish Balay 143979bdfe76SSatish Balay /* stuff for MatGetRow() */ 144079bdfe76SSatish Balay b->rowindices = 0; 144179bdfe76SSatish Balay b->rowvalues = 0; 144279bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 144379bdfe76SSatish Balay 144479bdfe76SSatish Balay *A = B; 144579bdfe76SSatish Balay return 0; 144679bdfe76SSatish Balay } 1447026e39d0SSatish Balay 14485615d1e5SSatish Balay #undef __FUNC__ 14495615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 14500ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 14510ac07820SSatish Balay { 14520ac07820SSatish Balay Mat mat; 14530ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 14540ac07820SSatish Balay int ierr, len=0, flg; 14550ac07820SSatish Balay 14560ac07820SSatish Balay *newmat = 0; 14570ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 14580ac07820SSatish Balay PLogObjectCreate(mat); 14590ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 14600ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 14610ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 14620ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 14630ac07820SSatish Balay mat->factor = matin->factor; 14640ac07820SSatish Balay mat->assembled = PETSC_TRUE; 14650ac07820SSatish Balay 14660ac07820SSatish Balay a->m = mat->m = oldmat->m; 14670ac07820SSatish Balay a->n = mat->n = oldmat->n; 14680ac07820SSatish Balay a->M = mat->M = oldmat->M; 14690ac07820SSatish Balay a->N = mat->N = oldmat->N; 14700ac07820SSatish Balay 14710ac07820SSatish Balay a->bs = oldmat->bs; 14720ac07820SSatish Balay a->bs2 = oldmat->bs2; 14730ac07820SSatish Balay a->mbs = oldmat->mbs; 14740ac07820SSatish Balay a->nbs = oldmat->nbs; 14750ac07820SSatish Balay a->Mbs = oldmat->Mbs; 14760ac07820SSatish Balay a->Nbs = oldmat->Nbs; 14770ac07820SSatish Balay 14780ac07820SSatish Balay a->rstart = oldmat->rstart; 14790ac07820SSatish Balay a->rend = oldmat->rend; 14800ac07820SSatish Balay a->cstart = oldmat->cstart; 14810ac07820SSatish Balay a->cend = oldmat->cend; 14820ac07820SSatish Balay a->size = oldmat->size; 14830ac07820SSatish Balay a->rank = oldmat->rank; 14840ac07820SSatish Balay a->insertmode = NOT_SET_VALUES; 14850ac07820SSatish Balay a->rowvalues = 0; 14860ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 14870ac07820SSatish Balay 14880ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 14890ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 14900ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 14910ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 14920ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 14930ac07820SSatish Balay if (oldmat->colmap) { 14940ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 14950ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 14960ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 14970ac07820SSatish Balay } else a->colmap = 0; 14980ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 14990ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 15000ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 15010ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 15020ac07820SSatish Balay } else a->garray = 0; 15030ac07820SSatish Balay 15040ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 15050ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 15060ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 15070ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 15080ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 15090ac07820SSatish Balay PLogObjectParent(mat,a->A); 15100ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 15110ac07820SSatish Balay PLogObjectParent(mat,a->B); 15120ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 15130ac07820SSatish Balay if (flg) { 15140ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 15150ac07820SSatish Balay } 15160ac07820SSatish Balay *newmat = mat; 15170ac07820SSatish Balay return 0; 15180ac07820SSatish Balay } 151957b952d6SSatish Balay 152057b952d6SSatish Balay #include "sys.h" 152157b952d6SSatish Balay 15225615d1e5SSatish Balay #undef __FUNC__ 15235615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 152457b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 152557b952d6SSatish Balay { 152657b952d6SSatish Balay Mat A; 152757b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 152857b952d6SSatish Balay Scalar *vals,*buf; 152957b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 153057b952d6SSatish Balay MPI_Status status; 1531cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 153257b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 153357b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 153457b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 153557b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 153657b952d6SSatish Balay 153757b952d6SSatish Balay 153857b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 153957b952d6SSatish Balay bs2 = bs*bs; 154057b952d6SSatish Balay 154157b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 154257b952d6SSatish Balay if (!rank) { 154357b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 154457b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1545e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 154657b952d6SSatish Balay } 154757b952d6SSatish Balay 154857b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 154957b952d6SSatish Balay M = header[1]; N = header[2]; 155057b952d6SSatish Balay 1551e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 155257b952d6SSatish Balay 155357b952d6SSatish Balay /* 155457b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 155557b952d6SSatish Balay divisible by the blocksize 155657b952d6SSatish Balay */ 155757b952d6SSatish Balay Mbs = M/bs; 155857b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 155957b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 156057b952d6SSatish Balay else Mbs++; 156157b952d6SSatish Balay if (extra_rows &&!rank) { 1562b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 156357b952d6SSatish Balay } 1564537820f0SBarry Smith 156557b952d6SSatish Balay /* determine ownership of all rows */ 156657b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 156757b952d6SSatish Balay m = mbs * bs; 1568cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1569cee3aa6bSSatish Balay browners = rowners + size + 1; 157057b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 157157b952d6SSatish Balay rowners[0] = 0; 1572cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1573cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 157457b952d6SSatish Balay rstart = rowners[rank]; 157557b952d6SSatish Balay rend = rowners[rank+1]; 157657b952d6SSatish Balay 157757b952d6SSatish Balay /* distribute row lengths to all processors */ 157857b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 157957b952d6SSatish Balay if (!rank) { 158057b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 158157b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 158257b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 158357b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1584cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1585cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 158657b952d6SSatish Balay PetscFree(sndcounts); 158757b952d6SSatish Balay } 158857b952d6SSatish Balay else { 158957b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 159057b952d6SSatish Balay } 159157b952d6SSatish Balay 159257b952d6SSatish Balay if (!rank) { 159357b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 159457b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 159557b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 159657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 159757b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 159857b952d6SSatish Balay procsnz[i] += rowlengths[j]; 159957b952d6SSatish Balay } 160057b952d6SSatish Balay } 160157b952d6SSatish Balay PetscFree(rowlengths); 160257b952d6SSatish Balay 160357b952d6SSatish Balay /* determine max buffer needed and allocate it */ 160457b952d6SSatish Balay maxnz = 0; 160557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 160657b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 160757b952d6SSatish Balay } 160857b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 160957b952d6SSatish Balay 161057b952d6SSatish Balay /* read in my part of the matrix column indices */ 161157b952d6SSatish Balay nz = procsnz[0]; 161257b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 161357b952d6SSatish Balay mycols = ibuf; 1614cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 161557b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1616cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1617cee3aa6bSSatish Balay 161857b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 161957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 162057b952d6SSatish Balay nz = procsnz[i]; 162157b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 162257b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 162357b952d6SSatish Balay } 162457b952d6SSatish Balay /* read in the stuff for the last proc */ 162557b952d6SSatish Balay if ( size != 1 ) { 162657b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 162757b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 162857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1629cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 163057b952d6SSatish Balay } 163157b952d6SSatish Balay PetscFree(cols); 163257b952d6SSatish Balay } 163357b952d6SSatish Balay else { 163457b952d6SSatish Balay /* determine buffer space needed for message */ 163557b952d6SSatish Balay nz = 0; 163657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 163757b952d6SSatish Balay nz += locrowlens[i]; 163857b952d6SSatish Balay } 163957b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 164057b952d6SSatish Balay mycols = ibuf; 164157b952d6SSatish Balay /* receive message of column indices*/ 164257b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 164357b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1644e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 164557b952d6SSatish Balay } 164657b952d6SSatish Balay 164757b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1648cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1649cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 165057b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1651cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 165257b952d6SSatish Balay masked1 = mask + Mbs; 165357b952d6SSatish Balay masked2 = masked1 + Mbs; 165457b952d6SSatish Balay rowcount = 0; nzcount = 0; 165557b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 165657b952d6SSatish Balay dcount = 0; 165757b952d6SSatish Balay odcount = 0; 165857b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 165957b952d6SSatish Balay kmax = locrowlens[rowcount]; 166057b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 166157b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 166257b952d6SSatish Balay if (!mask[tmp]) { 166357b952d6SSatish Balay mask[tmp] = 1; 166457b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 166557b952d6SSatish Balay else masked1[dcount++] = tmp; 166657b952d6SSatish Balay } 166757b952d6SSatish Balay } 166857b952d6SSatish Balay rowcount++; 166957b952d6SSatish Balay } 1670cee3aa6bSSatish Balay 167157b952d6SSatish Balay dlens[i] = dcount; 167257b952d6SSatish Balay odlens[i] = odcount; 1673cee3aa6bSSatish Balay 167457b952d6SSatish Balay /* zero out the mask elements we set */ 167557b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 167657b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 167757b952d6SSatish Balay } 1678cee3aa6bSSatish Balay 167957b952d6SSatish Balay /* create our matrix */ 1680537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1681537820f0SBarry Smith CHKERRQ(ierr); 168257b952d6SSatish Balay A = *newmat; 16836d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 168457b952d6SSatish Balay 168557b952d6SSatish Balay if (!rank) { 168657b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 168757b952d6SSatish Balay /* read in my part of the matrix numerical values */ 168857b952d6SSatish Balay nz = procsnz[0]; 168957b952d6SSatish Balay vals = buf; 1690cee3aa6bSSatish Balay mycols = ibuf; 1691cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 169257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1693cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1694537820f0SBarry Smith 169557b952d6SSatish Balay /* insert into matrix */ 169657b952d6SSatish Balay jj = rstart*bs; 169757b952d6SSatish Balay for ( i=0; i<m; i++ ) { 169857b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 169957b952d6SSatish Balay mycols += locrowlens[i]; 170057b952d6SSatish Balay vals += locrowlens[i]; 170157b952d6SSatish Balay jj++; 170257b952d6SSatish Balay } 170357b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 170457b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 170557b952d6SSatish Balay nz = procsnz[i]; 170657b952d6SSatish Balay vals = buf; 170757b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 170857b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 170957b952d6SSatish Balay } 171057b952d6SSatish Balay /* the last proc */ 171157b952d6SSatish Balay if ( size != 1 ){ 171257b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1713cee3aa6bSSatish Balay vals = buf; 171457b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 171557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1716cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 171757b952d6SSatish Balay } 171857b952d6SSatish Balay PetscFree(procsnz); 171957b952d6SSatish Balay } 172057b952d6SSatish Balay else { 172157b952d6SSatish Balay /* receive numeric values */ 172257b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 172357b952d6SSatish Balay 172457b952d6SSatish Balay /* receive message of values*/ 172557b952d6SSatish Balay vals = buf; 1726cee3aa6bSSatish Balay mycols = ibuf; 172757b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 172857b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1729e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 173057b952d6SSatish Balay 173157b952d6SSatish Balay /* insert into matrix */ 173257b952d6SSatish Balay jj = rstart*bs; 1733cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 173457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 173557b952d6SSatish Balay mycols += locrowlens[i]; 173657b952d6SSatish Balay vals += locrowlens[i]; 173757b952d6SSatish Balay jj++; 173857b952d6SSatish Balay } 173957b952d6SSatish Balay } 174057b952d6SSatish Balay PetscFree(locrowlens); 174157b952d6SSatish Balay PetscFree(buf); 174257b952d6SSatish Balay PetscFree(ibuf); 174357b952d6SSatish Balay PetscFree(rowners); 174457b952d6SSatish Balay PetscFree(dlens); 1745cee3aa6bSSatish Balay PetscFree(mask); 17466d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 17476d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 174857b952d6SSatish Balay return 0; 174957b952d6SSatish Balay } 175057b952d6SSatish Balay 175157b952d6SSatish Balay 1752