1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*83e2fdc7SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.74 1997/07/28 16:13:24 bsmith Exp bsmith $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2257b952d6SSatish Balay 233b2fbd54SBarry Smith 24537820f0SBarry Smith /* 25537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2857b952d6SSatish Balay length of colmap equals the global matrix length. 2957b952d6SSatish Balay */ 305615d1e5SSatish Balay #undef __FUNC__ 315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3357b952d6SSatish Balay { 3457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3757b952d6SSatish Balay 38758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3957b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4057b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4257b952d6SSatish Balay return 0; 4357b952d6SSatish Balay } 4457b952d6SSatish Balay 455615d1e5SSatish Balay #undef __FUNC__ 465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 4957b952d6SSatish Balay { 503b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5157b952d6SSatish Balay int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 553b2fbd54SBarry Smith return 0; 563b2fbd54SBarry Smith } 573b2fbd54SBarry Smith 585615d1e5SSatish Balay #undef __FUNC__ 595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 613b2fbd54SBarry Smith PetscTruth *done) 623b2fbd54SBarry Smith { 633b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 643b2fbd54SBarry Smith int ierr; 653b2fbd54SBarry Smith if (aij->size == 1) { 663b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6857b952d6SSatish Balay return 0; 6957b952d6SSatish Balay } 7080c1aa95SSatish Balay #define CHUNKSIZE 10 7180c1aa95SSatish Balay 72f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7380c1aa95SSatish Balay { \ 7480c1aa95SSatish Balay \ 7580c1aa95SSatish Balay brow = row/bs; \ 7680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 7880c1aa95SSatish Balay bcol = col/bs; \ 7980c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 80ab26458aSBarry Smith low = 0; high = nrow; \ 81ab26458aSBarry Smith while (high-low > 3) { \ 82ab26458aSBarry Smith t = (low+high)/2; \ 83ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 84ab26458aSBarry Smith else low = t; \ 85ab26458aSBarry Smith } \ 86ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 8780c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8880c1aa95SSatish Balay if (rp[_i] == bcol) { \ 8980c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 90eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 91eada6651SSatish Balay else *bap = value; \ 92ac7a638eSSatish Balay goto a_noinsert; \ 9380c1aa95SSatish Balay } \ 9480c1aa95SSatish Balay } \ 9589280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 9611ebbc71SLois Curfman McInnes else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 9780c1aa95SSatish Balay if (nrow >= rmax) { \ 9880c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9980c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 10080c1aa95SSatish Balay Scalar *new_a; \ 10180c1aa95SSatish Balay \ 10211ebbc71SLois Curfman McInnes if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 10389280ab3SLois Curfman McInnes \ 10480c1aa95SSatish Balay /* malloc new storage space */ \ 10580c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 10680c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 10780c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10880c1aa95SSatish Balay new_i = new_j + new_nz; \ 10980c1aa95SSatish Balay \ 11080c1aa95SSatish Balay /* copy over old data into new slots */ \ 11180c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 11280c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 11380c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 11480c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 11580c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 11680c1aa95SSatish Balay len*sizeof(int)); \ 11780c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11880c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11980c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 12080c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 12180c1aa95SSatish Balay /* free up old matrix storage */ \ 12280c1aa95SSatish Balay PetscFree(a->a); \ 12380c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 12480c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 12580c1aa95SSatish Balay a->singlemalloc = 1; \ 12680c1aa95SSatish Balay \ 12780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 128ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 12980c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 13080c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 13180c1aa95SSatish Balay a->reallocs++; \ 13280c1aa95SSatish Balay a->nz++; \ 13380c1aa95SSatish Balay } \ 13480c1aa95SSatish Balay N = nrow++ - 1; \ 13580c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 13680c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 13780c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13880c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13980c1aa95SSatish Balay } \ 14080c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 14180c1aa95SSatish Balay rp[_i] = bcol; \ 14280c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 143ac7a638eSSatish Balay a_noinsert:; \ 14480c1aa95SSatish Balay ailen[brow] = nrow; \ 14580c1aa95SSatish Balay } 14657b952d6SSatish Balay 147ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148ac7a638eSSatish Balay { \ 149ac7a638eSSatish Balay \ 150ac7a638eSSatish Balay brow = row/bs; \ 151ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 153ac7a638eSSatish Balay bcol = col/bs; \ 154ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 155ac7a638eSSatish Balay low = 0; high = nrow; \ 156ac7a638eSSatish Balay while (high-low > 3) { \ 157ac7a638eSSatish Balay t = (low+high)/2; \ 158ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 159ac7a638eSSatish Balay else low = t; \ 160ac7a638eSSatish Balay } \ 161ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 162ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 163ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 164ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 165ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 166ac7a638eSSatish Balay else *bap = value; \ 167ac7a638eSSatish Balay goto b_noinsert; \ 168ac7a638eSSatish Balay } \ 169ac7a638eSSatish Balay } \ 17089280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 17111ebbc71SLois Curfman McInnes else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 172ac7a638eSSatish Balay if (nrow >= rmax) { \ 173ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 17474c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 175ac7a638eSSatish Balay Scalar *new_a; \ 176ac7a638eSSatish Balay \ 17711ebbc71SLois Curfman McInnes if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 17889280ab3SLois Curfman McInnes \ 179ac7a638eSSatish Balay /* malloc new storage space */ \ 18074c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 181ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 182ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 183ac7a638eSSatish Balay new_i = new_j + new_nz; \ 184ac7a638eSSatish Balay \ 185ac7a638eSSatish Balay /* copy over old data into new slots */ \ 186ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 18774c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 188ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 189ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 190ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 191ac7a638eSSatish Balay len*sizeof(int)); \ 192ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 193ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 194ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 195ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 196ac7a638eSSatish Balay /* free up old matrix storage */ \ 19774c639caSSatish Balay PetscFree(b->a); \ 19874c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 19974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 20074c639caSSatish Balay b->singlemalloc = 1; \ 201ac7a638eSSatish Balay \ 202ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 203ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 20474c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 20574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 20674c639caSSatish Balay b->reallocs++; \ 20774c639caSSatish Balay b->nz++; \ 208ac7a638eSSatish Balay } \ 209ac7a638eSSatish Balay N = nrow++ - 1; \ 210ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 211ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 212ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 213ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 214ac7a638eSSatish Balay } \ 215ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 216ac7a638eSSatish Balay rp[_i] = bcol; \ 217ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 218ac7a638eSSatish Balay b_noinsert:; \ 219ac7a638eSSatish Balay bilen[brow] = nrow; \ 220ac7a638eSSatish Balay } 221ac7a638eSSatish Balay 2225615d1e5SSatish Balay #undef __FUNC__ 2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 22557b952d6SSatish Balay { 22657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22757b952d6SSatish Balay Scalar value; 2284fa0d573SSatish Balay int ierr,i,j,row,col; 2294fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2304fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2314fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 23257b952d6SSatish Balay 233eada6651SSatish Balay /* Some Variables required in the macro */ 23480c1aa95SSatish Balay Mat A = baij->A; 23580c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 236ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 237ac7a638eSSatish Balay Scalar *aa=a->a; 238ac7a638eSSatish Balay 239ac7a638eSSatish Balay Mat B = baij->B; 240ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 241ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 242ac7a638eSSatish Balay Scalar *ba=b->a; 243ac7a638eSSatish Balay 244ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 245ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 246ac7a638eSSatish Balay Scalar *ap,*bap; 24780c1aa95SSatish Balay 24857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 250e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 251e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 252639f9d9dSBarry Smith #endif 25357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 25457b952d6SSatish Balay row = im[i] - rstart_orig; 25557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 25657b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 25757b952d6SSatish Balay col = in[j] - cstart_orig; 25857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 26080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 26157b952d6SSatish Balay } 262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 263e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 264e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 265639f9d9dSBarry Smith #endif 26657b952d6SSatish Balay else { 26757b952d6SSatish Balay if (mat->was_assembled) { 268905e6a2fSBarry Smith if (!baij->colmap) { 269905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 270905e6a2fSBarry Smith } 271905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 27257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 27357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 27457b952d6SSatish Balay col = in[j]; 2759bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2769bf004c3SSatish Balay B = baij->B; 2779bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2789bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2799bf004c3SSatish Balay ba=b->a; 28057b952d6SSatish Balay } 28157b952d6SSatish Balay } 28257b952d6SSatish Balay else col = in[j]; 28357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 284ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 285ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 28657b952d6SSatish Balay } 28757b952d6SSatish Balay } 28857b952d6SSatish Balay } 28957b952d6SSatish Balay else { 29090f02eecSBarry Smith if (roworiented && !baij->donotstash) { 29157b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 29257b952d6SSatish Balay } 29357b952d6SSatish Balay else { 29490f02eecSBarry Smith if (!baij->donotstash) { 29557b952d6SSatish Balay row = im[i]; 29657b952d6SSatish Balay for ( j=0; j<n; j++ ) { 29757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 29857b952d6SSatish Balay } 29957b952d6SSatish Balay } 30057b952d6SSatish Balay } 30157b952d6SSatish Balay } 30290f02eecSBarry Smith } 30357b952d6SSatish Balay return 0; 30457b952d6SSatish Balay } 30557b952d6SSatish Balay 306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 307ab26458aSBarry Smith #undef __FUNC__ 308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 310ab26458aSBarry Smith { 311ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 31230793edcSSatish Balay Scalar *value,*barray=baij->barray; 313abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 314ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 315ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 316ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 317ab26458aSBarry Smith 31830793edcSSatish Balay 31930793edcSSatish Balay if(!barray) { 32047513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 32130793edcSSatish Balay } 32230793edcSSatish Balay 323ab26458aSBarry Smith if (roworiented) { 324ab26458aSBarry Smith stepval = (n-1)*bs; 325ab26458aSBarry Smith } else { 326ab26458aSBarry Smith stepval = (m-1)*bs; 327ab26458aSBarry Smith } 328ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 329ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 330ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 331ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 332ab26458aSBarry Smith #endif 333ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 334ab26458aSBarry Smith row = im[i] - rstart; 335ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 33615b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 33715b57d14SSatish Balay if ((roworiented) && (n == 1)) { 33815b57d14SSatish Balay barray = v + i*bs2; 33915b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 34015b57d14SSatish Balay barray = v + j*bs2; 34115b57d14SSatish Balay } else { /* Here a copy is required */ 342ab26458aSBarry Smith if (roworiented) { 343ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 344ab26458aSBarry Smith } else { 345ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 346abef11f7SSatish Balay } 34747513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 34847513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 34930793edcSSatish Balay *barray++ = *value++; 35047513183SBarry Smith } 35147513183SBarry Smith } 35230793edcSSatish Balay barray -=bs2; 35315b57d14SSatish Balay } 354abef11f7SSatish Balay 355abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 356abef11f7SSatish Balay col = in[j] - cstart; 35730793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 358ab26458aSBarry Smith } 359ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 360ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 36147513183SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");} 362ab26458aSBarry Smith #endif 363ab26458aSBarry Smith else { 364ab26458aSBarry Smith if (mat->was_assembled) { 365ab26458aSBarry Smith if (!baij->colmap) { 366ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 367ab26458aSBarry Smith } 368a5eb4965SSatish Balay 369a5eb4965SSatish Balay #if defined(PETSC_BOPT_g) 370a5eb4965SSatish Balay if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");} 371a5eb4965SSatish Balay #endif 372a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 373ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 374ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 375ab26458aSBarry Smith col = in[j]; 376ab26458aSBarry Smith } 377ab26458aSBarry Smith } 378ab26458aSBarry Smith else col = in[j]; 37930793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 380ab26458aSBarry Smith } 381ab26458aSBarry Smith } 382ab26458aSBarry Smith } 383ab26458aSBarry Smith else { 384ab26458aSBarry Smith if (!baij->donotstash) { 385ab26458aSBarry Smith if (roworiented ) { 386abef11f7SSatish Balay row = im[i]*bs; 387abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 388abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 389abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 390abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 391abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 392abef11f7SSatish Balay } 393ab26458aSBarry Smith } 394ab26458aSBarry Smith } 395ab26458aSBarry Smith } 396ab26458aSBarry Smith else { 397ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 398abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 399abef11f7SSatish Balay col = in[j]*bs; 400abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 401abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 402abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 403ab26458aSBarry Smith } 404ab26458aSBarry Smith } 405ab26458aSBarry Smith } 406abef11f7SSatish Balay } 407abef11f7SSatish Balay } 408ab26458aSBarry Smith } 409ab26458aSBarry Smith } 410ab26458aSBarry Smith return 0; 411ab26458aSBarry Smith } 412ab26458aSBarry Smith 4135615d1e5SSatish Balay #undef __FUNC__ 4145615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 415ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 416d6de1c52SSatish Balay { 417d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 418d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 419d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 420d6de1c52SSatish Balay 421d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 422e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 423e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 424d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 425d6de1c52SSatish Balay row = idxm[i] - bsrstart; 426d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 427e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 428e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 429d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 430d6de1c52SSatish Balay col = idxn[j] - bscstart; 431d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 432d6de1c52SSatish Balay } 433d6de1c52SSatish Balay else { 434905e6a2fSBarry Smith if (!baij->colmap) { 435905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 436905e6a2fSBarry Smith } 437e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 438dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 439d9d09a02SSatish Balay else { 440dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 441d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 442d6de1c52SSatish Balay } 443d6de1c52SSatish Balay } 444d6de1c52SSatish Balay } 445d9d09a02SSatish Balay } 446d6de1c52SSatish Balay else { 447e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 448d6de1c52SSatish Balay } 449d6de1c52SSatish Balay } 450d6de1c52SSatish Balay return 0; 451d6de1c52SSatish Balay } 452d6de1c52SSatish Balay 4535615d1e5SSatish Balay #undef __FUNC__ 4545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 455ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 456d6de1c52SSatish Balay { 457d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 458d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 459acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 460d6de1c52SSatish Balay double sum = 0.0; 461d6de1c52SSatish Balay Scalar *v; 462d6de1c52SSatish Balay 463d6de1c52SSatish Balay if (baij->size == 1) { 464d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 465d6de1c52SSatish Balay } else { 466d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 467d6de1c52SSatish Balay v = amat->a; 468d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 469d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 470d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 471d6de1c52SSatish Balay #else 472d6de1c52SSatish Balay sum += (*v)*(*v); v++; 473d6de1c52SSatish Balay #endif 474d6de1c52SSatish Balay } 475d6de1c52SSatish Balay v = bmat->a; 476d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 477d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 478d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 479d6de1c52SSatish Balay #else 480d6de1c52SSatish Balay sum += (*v)*(*v); v++; 481d6de1c52SSatish Balay #endif 482d6de1c52SSatish Balay } 483d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 484d6de1c52SSatish Balay *norm = sqrt(*norm); 485d6de1c52SSatish Balay } 486acdf5bf4SSatish Balay else 487e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 488d6de1c52SSatish Balay } 489d6de1c52SSatish Balay return 0; 490d6de1c52SSatish Balay } 49157b952d6SSatish Balay 4925615d1e5SSatish Balay #undef __FUNC__ 4935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 494ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 49557b952d6SSatish Balay { 49657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 49757b952d6SSatish Balay MPI_Comm comm = mat->comm; 49857b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 49957b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 50057b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 50157b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 50257b952d6SSatish Balay InsertMode addv; 50357b952d6SSatish Balay Scalar *rvalues,*svalues; 50457b952d6SSatish Balay 50557b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 506e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 50757b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 508e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 50957b952d6SSatish Balay } 510e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 51157b952d6SSatish Balay 51257b952d6SSatish Balay /* first count number of contributors to each processor */ 51357b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 51457b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 51557b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 51657b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 51757b952d6SSatish Balay idx = baij->stash.idx[i]; 51857b952d6SSatish Balay for ( j=0; j<size; j++ ) { 51957b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 52057b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 52157b952d6SSatish Balay } 52257b952d6SSatish Balay } 52357b952d6SSatish Balay } 52457b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 52557b952d6SSatish Balay 52657b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 52757b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 52857b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 52957b952d6SSatish Balay nreceives = work[rank]; 53057b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 53157b952d6SSatish Balay nmax = work[rank]; 53257b952d6SSatish Balay PetscFree(work); 53357b952d6SSatish Balay 53457b952d6SSatish Balay /* post receives: 53557b952d6SSatish Balay 1) each message will consist of ordered pairs 53657b952d6SSatish Balay (global index,value) we store the global index as a double 53757b952d6SSatish Balay to simplify the message passing. 53857b952d6SSatish Balay 2) since we don't know how long each individual message is we 53957b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 54057b952d6SSatish Balay this is a lot of wasted space. 54157b952d6SSatish Balay 54257b952d6SSatish Balay 54357b952d6SSatish Balay This could be done better. 54457b952d6SSatish Balay */ 54557b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 54657b952d6SSatish Balay CHKPTRQ(rvalues); 54757b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 54857b952d6SSatish Balay CHKPTRQ(recv_waits); 54957b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 55057b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 55157b952d6SSatish Balay comm,recv_waits+i); 55257b952d6SSatish Balay } 55357b952d6SSatish Balay 55457b952d6SSatish Balay /* do sends: 55557b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 55657b952d6SSatish Balay the ith processor 55757b952d6SSatish Balay */ 55857b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 55957b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 56057b952d6SSatish Balay CHKPTRQ(send_waits); 56157b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 56257b952d6SSatish Balay starts[0] = 0; 56357b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 56457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 56557b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 56657b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 56757b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 56857b952d6SSatish Balay } 56957b952d6SSatish Balay PetscFree(owner); 57057b952d6SSatish Balay starts[0] = 0; 57157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 57257b952d6SSatish Balay count = 0; 57357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 57457b952d6SSatish Balay if (procs[i]) { 57557b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 57657b952d6SSatish Balay comm,send_waits+count++); 57757b952d6SSatish Balay } 57857b952d6SSatish Balay } 57957b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 58057b952d6SSatish Balay 58157b952d6SSatish Balay /* Free cache space */ 582d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 58357b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 58457b952d6SSatish Balay 58557b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 58657b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 58757b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 58857b952d6SSatish Balay baij->rmax = nmax; 58957b952d6SSatish Balay 59057b952d6SSatish Balay return 0; 59157b952d6SSatish Balay } 59257b952d6SSatish Balay 59357b952d6SSatish Balay 5945615d1e5SSatish Balay #undef __FUNC__ 5955615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 596ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 59757b952d6SSatish Balay { 59857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 59957b952d6SSatish Balay MPI_Status *send_status,recv_status; 60057b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 60157b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 60257b952d6SSatish Balay Scalar *values,val; 603e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 60457b952d6SSatish Balay 60557b952d6SSatish Balay /* wait on receives */ 60657b952d6SSatish Balay while (count) { 60757b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 60857b952d6SSatish Balay /* unpack receives into our local space */ 60957b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 61057b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 61157b952d6SSatish Balay n = n/3; 61257b952d6SSatish Balay for ( i=0; i<n; i++ ) { 61357b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 61457b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 61557b952d6SSatish Balay val = values[3*i+2]; 61657b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 61757b952d6SSatish Balay col -= baij->cstart*bs; 6186fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 61957b952d6SSatish Balay } 62057b952d6SSatish Balay else { 62157b952d6SSatish Balay if (mat->was_assembled) { 622905e6a2fSBarry Smith if (!baij->colmap) { 623905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 624905e6a2fSBarry Smith } 625a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 62657b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 62757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 62857b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 62957b952d6SSatish Balay } 63057b952d6SSatish Balay } 6316fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 63257b952d6SSatish Balay } 63357b952d6SSatish Balay } 63457b952d6SSatish Balay count--; 63557b952d6SSatish Balay } 63657b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 63757b952d6SSatish Balay 63857b952d6SSatish Balay /* wait on sends */ 63957b952d6SSatish Balay if (baij->nsends) { 64057b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 64157b952d6SSatish Balay CHKPTRQ(send_status); 64257b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 64357b952d6SSatish Balay PetscFree(send_status); 64457b952d6SSatish Balay } 64557b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 64657b952d6SSatish Balay 64757b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 64857b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 64957b952d6SSatish Balay 65057b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 65157b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 65257b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 65357b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 65457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 65557b952d6SSatish Balay } 65657b952d6SSatish Balay 6576d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 65857b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 65957b952d6SSatish Balay } 66057b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 66157b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 66257b952d6SSatish Balay 66357b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 66457b952d6SSatish Balay return 0; 66557b952d6SSatish Balay } 66657b952d6SSatish Balay 6675615d1e5SSatish Balay #undef __FUNC__ 6685615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 66957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 67057b952d6SSatish Balay { 67157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 67257b952d6SSatish Balay int ierr; 67357b952d6SSatish Balay 67457b952d6SSatish Balay if (baij->size == 1) { 67557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 67657b952d6SSatish Balay } 677e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 67857b952d6SSatish Balay return 0; 67957b952d6SSatish Balay } 68057b952d6SSatish Balay 6815615d1e5SSatish Balay #undef __FUNC__ 6825615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 68357b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 68457b952d6SSatish Balay { 68557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 686cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 68757b952d6SSatish Balay FILE *fd; 68857b952d6SSatish Balay ViewerType vtype; 68957b952d6SSatish Balay 69057b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 69157b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 69257b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 693639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 6944e220ebcSLois Curfman McInnes MatInfo info; 69557b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 69657b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6974e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 69857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 69957b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 7004e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 7014e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 7024e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 7034e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 7044e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 7054e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 70657b952d6SSatish Balay fflush(fd); 70757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 70857b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 70957b952d6SSatish Balay return 0; 71057b952d6SSatish Balay } 711639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 712bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 71357b952d6SSatish Balay return 0; 71457b952d6SSatish Balay } 71557b952d6SSatish Balay } 71657b952d6SSatish Balay 71757b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 71857b952d6SSatish Balay Draw draw; 71957b952d6SSatish Balay PetscTruth isnull; 72057b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 72157b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 72257b952d6SSatish Balay } 72357b952d6SSatish Balay 72457b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 72557b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 72657b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 72757b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 72857b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 72957b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 73057b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 73157b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 73257b952d6SSatish Balay fflush(fd); 73357b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 73457b952d6SSatish Balay } 73557b952d6SSatish Balay else { 73657b952d6SSatish Balay int size = baij->size; 73757b952d6SSatish Balay rank = baij->rank; 73857b952d6SSatish Balay if (size == 1) { 73957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 74057b952d6SSatish Balay } 74157b952d6SSatish Balay else { 74257b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 74357b952d6SSatish Balay Mat A; 74457b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 74557b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 74657b952d6SSatish Balay int mbs=baij->mbs; 74757b952d6SSatish Balay Scalar *a; 74857b952d6SSatish Balay 74957b952d6SSatish Balay if (!rank) { 750cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 75157b952d6SSatish Balay CHKERRQ(ierr); 75257b952d6SSatish Balay } 75357b952d6SSatish Balay else { 754cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 75557b952d6SSatish Balay CHKERRQ(ierr); 75657b952d6SSatish Balay } 75757b952d6SSatish Balay PLogObjectParent(mat,A); 75857b952d6SSatish Balay 75957b952d6SSatish Balay /* copy over the A part */ 76057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 76157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 76257b952d6SSatish Balay row = baij->rstart; 76357b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 76457b952d6SSatish Balay 76557b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 76657b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 76757b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 76857b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 76957b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 77057b952d6SSatish Balay for (k=0; k<bs; k++ ) { 771cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 772cee3aa6bSSatish Balay col++; a += bs; 77357b952d6SSatish Balay } 77457b952d6SSatish Balay } 77557b952d6SSatish Balay } 77657b952d6SSatish Balay /* copy over the B part */ 77757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 77857b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 77957b952d6SSatish Balay row = baij->rstart*bs; 78057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 78157b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 78257b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 78357b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 78457b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 78557b952d6SSatish Balay for (k=0; k<bs; k++ ) { 786cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 787cee3aa6bSSatish Balay col++; a += bs; 78857b952d6SSatish Balay } 78957b952d6SSatish Balay } 79057b952d6SSatish Balay } 79157b952d6SSatish Balay PetscFree(rvals); 7926d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7936d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 79457b952d6SSatish Balay if (!rank) { 79557b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 79657b952d6SSatish Balay } 79757b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 79857b952d6SSatish Balay } 79957b952d6SSatish Balay } 80057b952d6SSatish Balay return 0; 80157b952d6SSatish Balay } 80257b952d6SSatish Balay 80357b952d6SSatish Balay 80457b952d6SSatish Balay 8055615d1e5SSatish Balay #undef __FUNC__ 8065615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 807ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 80857b952d6SSatish Balay { 80957b952d6SSatish Balay Mat mat = (Mat) obj; 81057b952d6SSatish Balay int ierr; 81157b952d6SSatish Balay ViewerType vtype; 81257b952d6SSatish Balay 81357b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 81457b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 81557b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 81657b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 81757b952d6SSatish Balay } 81857b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 81957b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 82057b952d6SSatish Balay } 82157b952d6SSatish Balay return 0; 82257b952d6SSatish Balay } 82357b952d6SSatish Balay 8245615d1e5SSatish Balay #undef __FUNC__ 8255615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 826ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 82779bdfe76SSatish Balay { 82879bdfe76SSatish Balay Mat mat = (Mat) obj; 82979bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 83079bdfe76SSatish Balay int ierr; 83179bdfe76SSatish Balay 83279bdfe76SSatish Balay #if defined(PETSC_LOG) 83379bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 83479bdfe76SSatish Balay #endif 83579bdfe76SSatish Balay 836*83e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 83779bdfe76SSatish Balay PetscFree(baij->rowners); 83879bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 83979bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 84079bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 84179bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 84279bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 84379bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 84479bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 84530793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 84679bdfe76SSatish Balay PetscFree(baij); 84790f02eecSBarry Smith if (mat->mapping) { 84890f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 84990f02eecSBarry Smith } 85079bdfe76SSatish Balay PLogObjectDestroy(mat); 85179bdfe76SSatish Balay PetscHeaderDestroy(mat); 85279bdfe76SSatish Balay return 0; 85379bdfe76SSatish Balay } 85479bdfe76SSatish Balay 8555615d1e5SSatish Balay #undef __FUNC__ 8565615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 857ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 858cee3aa6bSSatish Balay { 859cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 86047b4a8eaSLois Curfman McInnes int ierr, nt; 861cee3aa6bSSatish Balay 862c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 86347b4a8eaSLois Curfman McInnes if (nt != a->n) { 864ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 86547b4a8eaSLois Curfman McInnes } 866c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 86747b4a8eaSLois Curfman McInnes if (nt != a->m) { 868e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 86947b4a8eaSLois Curfman McInnes } 87043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 871cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 87243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 873cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 87443a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 875cee3aa6bSSatish Balay return 0; 876cee3aa6bSSatish Balay } 877cee3aa6bSSatish Balay 8785615d1e5SSatish Balay #undef __FUNC__ 8795615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 880ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 881cee3aa6bSSatish Balay { 882cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 883cee3aa6bSSatish Balay int ierr; 88443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 885cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 88643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 887cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 888cee3aa6bSSatish Balay return 0; 889cee3aa6bSSatish Balay } 890cee3aa6bSSatish Balay 8915615d1e5SSatish Balay #undef __FUNC__ 8925615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 893ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 894cee3aa6bSSatish Balay { 895cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 896cee3aa6bSSatish Balay int ierr; 897cee3aa6bSSatish Balay 898cee3aa6bSSatish Balay /* do nondiagonal part */ 899cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 900cee3aa6bSSatish Balay /* send it on its way */ 901537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 902cee3aa6bSSatish Balay /* do local part */ 903cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 904cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 905cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 906cee3aa6bSSatish Balay /* but is not perhaps always true. */ 907639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 908cee3aa6bSSatish Balay return 0; 909cee3aa6bSSatish Balay } 910cee3aa6bSSatish Balay 9115615d1e5SSatish Balay #undef __FUNC__ 9125615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 913ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 914cee3aa6bSSatish Balay { 915cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 916cee3aa6bSSatish Balay int ierr; 917cee3aa6bSSatish Balay 918cee3aa6bSSatish Balay /* do nondiagonal part */ 919cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 920cee3aa6bSSatish Balay /* send it on its way */ 921537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 922cee3aa6bSSatish Balay /* do local part */ 923cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 924cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 925cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 926cee3aa6bSSatish Balay /* but is not perhaps always true. */ 927537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 928cee3aa6bSSatish Balay return 0; 929cee3aa6bSSatish Balay } 930cee3aa6bSSatish Balay 931cee3aa6bSSatish Balay /* 932cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 933cee3aa6bSSatish Balay diagonal block 934cee3aa6bSSatish Balay */ 9355615d1e5SSatish Balay #undef __FUNC__ 9365615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 937ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 938cee3aa6bSSatish Balay { 939cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 940cee3aa6bSSatish Balay if (a->M != a->N) 941e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 942cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 943cee3aa6bSSatish Balay } 944cee3aa6bSSatish Balay 9455615d1e5SSatish Balay #undef __FUNC__ 9465615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 947ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 948cee3aa6bSSatish Balay { 949cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 950cee3aa6bSSatish Balay int ierr; 951cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 952cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 953cee3aa6bSSatish Balay return 0; 954cee3aa6bSSatish Balay } 955026e39d0SSatish Balay 9565615d1e5SSatish Balay #undef __FUNC__ 9575615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 958ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 95957b952d6SSatish Balay { 96057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 96157b952d6SSatish Balay *m = mat->M; *n = mat->N; 96257b952d6SSatish Balay return 0; 96357b952d6SSatish Balay } 96457b952d6SSatish Balay 9655615d1e5SSatish Balay #undef __FUNC__ 9665615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 967ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 96857b952d6SSatish Balay { 96957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 97057b952d6SSatish Balay *m = mat->m; *n = mat->N; 97157b952d6SSatish Balay return 0; 97257b952d6SSatish Balay } 97357b952d6SSatish Balay 9745615d1e5SSatish Balay #undef __FUNC__ 9755615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 976ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 97757b952d6SSatish Balay { 97857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 97957b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 98057b952d6SSatish Balay return 0; 98157b952d6SSatish Balay } 98257b952d6SSatish Balay 983acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 984acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 985acdf5bf4SSatish Balay 9865615d1e5SSatish Balay #undef __FUNC__ 9875615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 988acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 989acdf5bf4SSatish Balay { 990acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 991acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 992acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 993d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 994d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 995acdf5bf4SSatish Balay 996e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 997acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 998acdf5bf4SSatish Balay 999acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1000acdf5bf4SSatish Balay /* 1001acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1002acdf5bf4SSatish Balay */ 1003acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1004bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1005bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1006acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1007acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1008acdf5bf4SSatish Balay } 1009acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1010acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1011acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1012acdf5bf4SSatish Balay } 1013acdf5bf4SSatish Balay 1014acdf5bf4SSatish Balay 1015e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 1016d9d09a02SSatish Balay lrow = row - brstart; 1017acdf5bf4SSatish Balay 1018acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1019acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1020acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1021acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1022acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1023acdf5bf4SSatish Balay nztot = nzA + nzB; 1024acdf5bf4SSatish Balay 1025acdf5bf4SSatish Balay cmap = mat->garray; 1026acdf5bf4SSatish Balay if (v || idx) { 1027acdf5bf4SSatish Balay if (nztot) { 1028acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1029acdf5bf4SSatish Balay int imark = -1; 1030acdf5bf4SSatish Balay if (v) { 1031acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1032acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1033d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1034acdf5bf4SSatish Balay else break; 1035acdf5bf4SSatish Balay } 1036acdf5bf4SSatish Balay imark = i; 1037acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1038acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1039acdf5bf4SSatish Balay } 1040acdf5bf4SSatish Balay if (idx) { 1041acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1042acdf5bf4SSatish Balay if (imark > -1) { 1043acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1044bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1045acdf5bf4SSatish Balay } 1046acdf5bf4SSatish Balay } else { 1047acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1048d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1049d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1050acdf5bf4SSatish Balay else break; 1051acdf5bf4SSatish Balay } 1052acdf5bf4SSatish Balay imark = i; 1053acdf5bf4SSatish Balay } 1054d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1055d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1056acdf5bf4SSatish Balay } 1057acdf5bf4SSatish Balay } 1058d212a18eSSatish Balay else { 1059d212a18eSSatish Balay if (idx) *idx = 0; 1060d212a18eSSatish Balay if (v) *v = 0; 1061d212a18eSSatish Balay } 1062acdf5bf4SSatish Balay } 1063acdf5bf4SSatish Balay *nz = nztot; 1064acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1065acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1066acdf5bf4SSatish Balay return 0; 1067acdf5bf4SSatish Balay } 1068acdf5bf4SSatish Balay 10695615d1e5SSatish Balay #undef __FUNC__ 10705615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1071acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1072acdf5bf4SSatish Balay { 1073acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1074acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1075e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 1076acdf5bf4SSatish Balay } 1077acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 1078acdf5bf4SSatish Balay return 0; 1079acdf5bf4SSatish Balay } 1080acdf5bf4SSatish Balay 10815615d1e5SSatish Balay #undef __FUNC__ 10825615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1083ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 10845a838052SSatish Balay { 10855a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 10865a838052SSatish Balay *bs = baij->bs; 10875a838052SSatish Balay return 0; 10885a838052SSatish Balay } 10895a838052SSatish Balay 10905615d1e5SSatish Balay #undef __FUNC__ 10915615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1092ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 109358667388SSatish Balay { 109458667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 109558667388SSatish Balay int ierr; 109658667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 109758667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 109858667388SSatish Balay return 0; 109958667388SSatish Balay } 11000ac07820SSatish Balay 11015615d1e5SSatish Balay #undef __FUNC__ 11025615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1103ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 11040ac07820SSatish Balay { 11054e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 11064e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 11077d57db60SLois Curfman McInnes int ierr; 11087d57db60SLois Curfman McInnes double isend[5], irecv[5]; 11090ac07820SSatish Balay 11104e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 11114e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 11124e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 11134e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 11144e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 11154e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11164e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 11174e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11184e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 11190ac07820SSatish Balay if (flag == MAT_LOCAL) { 11204e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11214e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11224e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11234e220ebcSLois Curfman McInnes info->memory = isend[3]; 11244e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11250ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1126dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 11274e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11284e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11294e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11304e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11314e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11320ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1133dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 11344e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11354e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11364e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11374e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11384e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11390ac07820SSatish Balay } 11404e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 11414e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 11424e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 11430ac07820SSatish Balay return 0; 11440ac07820SSatish Balay } 11450ac07820SSatish Balay 11465615d1e5SSatish Balay #undef __FUNC__ 11475615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1148ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 114958667388SSatish Balay { 115058667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 115158667388SSatish Balay 115258667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 115358667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 11546da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1155c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 115696854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1157c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1158b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1159b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1160b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1161aeafbbfcSLois Curfman McInnes a->roworiented = 1; 116258667388SSatish Balay MatSetOption(a->A,op); 116358667388SSatish Balay MatSetOption(a->B,op); 1164b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11656da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 116658667388SSatish Balay op == MAT_SYMMETRIC || 116758667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 116858667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 116958667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 117058667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 117158667388SSatish Balay a->roworiented = 0; 117258667388SSatish Balay MatSetOption(a->A,op); 117358667388SSatish Balay MatSetOption(a->B,op); 11742b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 117590f02eecSBarry Smith a->donotstash = 1; 117690f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1177e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 117858667388SSatish Balay else 1179e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 118058667388SSatish Balay return 0; 118158667388SSatish Balay } 118258667388SSatish Balay 11835615d1e5SSatish Balay #undef __FUNC__ 11845615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1185ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 11860ac07820SSatish Balay { 11870ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 11880ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 11890ac07820SSatish Balay Mat B; 11900ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 11910ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 11920ac07820SSatish Balay Scalar *a; 11930ac07820SSatish Balay 11940ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 1195e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 11960ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 11970ac07820SSatish Balay CHKERRQ(ierr); 11980ac07820SSatish Balay 11990ac07820SSatish Balay /* copy over the A part */ 12000ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 12010ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12020ac07820SSatish Balay row = baij->rstart; 12030ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 12040ac07820SSatish Balay 12050ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12060ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12070ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12080ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12090ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 12100ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12110ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12120ac07820SSatish Balay col++; a += bs; 12130ac07820SSatish Balay } 12140ac07820SSatish Balay } 12150ac07820SSatish Balay } 12160ac07820SSatish Balay /* copy over the B part */ 12170ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 12180ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12190ac07820SSatish Balay row = baij->rstart*bs; 12200ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12210ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12220ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12230ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12240ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12250ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12260ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12270ac07820SSatish Balay col++; a += bs; 12280ac07820SSatish Balay } 12290ac07820SSatish Balay } 12300ac07820SSatish Balay } 12310ac07820SSatish Balay PetscFree(rvals); 12320ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12330ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12340ac07820SSatish Balay 12350ac07820SSatish Balay if (matout != PETSC_NULL) { 12360ac07820SSatish Balay *matout = B; 12370ac07820SSatish Balay } else { 12380ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 12390ac07820SSatish Balay PetscFree(baij->rowners); 12400ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 12410ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 12420ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 12430ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 12440ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 12450ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 12460ac07820SSatish Balay PetscFree(baij); 1247f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 12480ac07820SSatish Balay PetscHeaderDestroy(B); 12490ac07820SSatish Balay } 12500ac07820SSatish Balay return 0; 12510ac07820SSatish Balay } 12520e95ebc0SSatish Balay 12535615d1e5SSatish Balay #undef __FUNC__ 12545615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 12550e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 12560e95ebc0SSatish Balay { 12570e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 12580e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 12590e95ebc0SSatish Balay int ierr,s1,s2,s3; 12600e95ebc0SSatish Balay 12610e95ebc0SSatish Balay if (ll) { 12620e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 12630e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1264e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 12650e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 12660e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 12670e95ebc0SSatish Balay } 1268e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 12690e95ebc0SSatish Balay return 0; 12700e95ebc0SSatish Balay } 12710e95ebc0SSatish Balay 12720ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 12730ac07820SSatish Balay matrix is square and the column and row owerships are identical. 12740ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 12750ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 12760ac07820SSatish Balay routine. 12770ac07820SSatish Balay */ 12785615d1e5SSatish Balay #undef __FUNC__ 12795615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1280ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 12810ac07820SSatish Balay { 12820ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 12830ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 12840ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 12850ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 12860ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 12870ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 12880ac07820SSatish Balay MPI_Comm comm = A->comm; 12890ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 12900ac07820SSatish Balay MPI_Status recv_status,*send_status; 12910ac07820SSatish Balay IS istmp; 12920ac07820SSatish Balay 12930ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 12940ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 12950ac07820SSatish Balay 12960ac07820SSatish Balay /* first count number of contributors to each processor */ 12970ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 12980ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 12990ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 13000ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13010ac07820SSatish Balay idx = rows[i]; 13020ac07820SSatish Balay found = 0; 13030ac07820SSatish Balay for ( j=0; j<size; j++ ) { 13040ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 13050ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 13060ac07820SSatish Balay } 13070ac07820SSatish Balay } 1308e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 13090ac07820SSatish Balay } 13100ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 13110ac07820SSatish Balay 13120ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 13130ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 13140ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 13150ac07820SSatish Balay nrecvs = work[rank]; 13160ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 13170ac07820SSatish Balay nmax = work[rank]; 13180ac07820SSatish Balay PetscFree(work); 13190ac07820SSatish Balay 13200ac07820SSatish Balay /* post receives: */ 13210ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 13220ac07820SSatish Balay CHKPTRQ(rvalues); 13230ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 13240ac07820SSatish Balay CHKPTRQ(recv_waits); 13250ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13260ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 13270ac07820SSatish Balay } 13280ac07820SSatish Balay 13290ac07820SSatish Balay /* do sends: 13300ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13310ac07820SSatish Balay the ith processor 13320ac07820SSatish Balay */ 13330ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 13340ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 13350ac07820SSatish Balay CHKPTRQ(send_waits); 13360ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 13370ac07820SSatish Balay starts[0] = 0; 13380ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13390ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13400ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 13410ac07820SSatish Balay } 13420ac07820SSatish Balay ISRestoreIndices(is,&rows); 13430ac07820SSatish Balay 13440ac07820SSatish Balay starts[0] = 0; 13450ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13460ac07820SSatish Balay count = 0; 13470ac07820SSatish Balay for ( i=0; i<size; i++ ) { 13480ac07820SSatish Balay if (procs[i]) { 13490ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 13500ac07820SSatish Balay } 13510ac07820SSatish Balay } 13520ac07820SSatish Balay PetscFree(starts); 13530ac07820SSatish Balay 13540ac07820SSatish Balay base = owners[rank]*bs; 13550ac07820SSatish Balay 13560ac07820SSatish Balay /* wait on receives */ 13570ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 13580ac07820SSatish Balay source = lens + nrecvs; 13590ac07820SSatish Balay count = nrecvs; slen = 0; 13600ac07820SSatish Balay while (count) { 13610ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 13620ac07820SSatish Balay /* unpack receives into our local space */ 13630ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 13640ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 13650ac07820SSatish Balay lens[imdex] = n; 13660ac07820SSatish Balay slen += n; 13670ac07820SSatish Balay count--; 13680ac07820SSatish Balay } 13690ac07820SSatish Balay PetscFree(recv_waits); 13700ac07820SSatish Balay 13710ac07820SSatish Balay /* move the data into the send scatter */ 13720ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 13730ac07820SSatish Balay count = 0; 13740ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13750ac07820SSatish Balay values = rvalues + i*nmax; 13760ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 13770ac07820SSatish Balay lrows[count++] = values[j] - base; 13780ac07820SSatish Balay } 13790ac07820SSatish Balay } 13800ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 13810ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 13820ac07820SSatish Balay 13830ac07820SSatish Balay /* actually zap the local rows */ 1384029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 13850ac07820SSatish Balay PLogObjectParent(A,istmp); 13860ac07820SSatish Balay PetscFree(lrows); 13870ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 13880ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 13890ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 13900ac07820SSatish Balay 13910ac07820SSatish Balay /* wait on sends */ 13920ac07820SSatish Balay if (nsends) { 13930ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 13940ac07820SSatish Balay CHKPTRQ(send_status); 13950ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 13960ac07820SSatish Balay PetscFree(send_status); 13970ac07820SSatish Balay } 13980ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 13990ac07820SSatish Balay 14000ac07820SSatish Balay return 0; 14010ac07820SSatish Balay } 1402ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 14035615d1e5SSatish Balay #undef __FUNC__ 14045615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1405ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1406ba4ca20aSSatish Balay { 1407ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1408ba4ca20aSSatish Balay 1409ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1410ba4ca20aSSatish Balay else return 0; 1411ba4ca20aSSatish Balay } 14120ac07820SSatish Balay 14135615d1e5SSatish Balay #undef __FUNC__ 14145615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1415ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1416bb5a7306SBarry Smith { 1417bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1418bb5a7306SBarry Smith int ierr; 1419bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1420bb5a7306SBarry Smith return 0; 1421bb5a7306SBarry Smith } 1422bb5a7306SBarry Smith 14230ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14240ac07820SSatish Balay 142579bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 142679bdfe76SSatish Balay static struct _MatOps MatOps = { 1427bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14284c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14294c50302cSBarry Smith 0,0,0,0, 14300ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14310e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 143258667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14334c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14344c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14354c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 143694a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1437d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1438ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1439bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1440ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 144179bdfe76SSatish Balay 144279bdfe76SSatish Balay 14435615d1e5SSatish Balay #undef __FUNC__ 14445615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 144579bdfe76SSatish Balay /*@C 144679bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 144779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 144879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 144979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 145079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 145179bdfe76SSatish Balay 145279bdfe76SSatish Balay Input Parameters: 145379bdfe76SSatish Balay . comm - MPI communicator 145479bdfe76SSatish Balay . bs - size of blockk 145579bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 145692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 145792e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 145892e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 145992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 146092e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 146179bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 146292e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 146379bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 146479bdfe76SSatish Balay submatrix (same for all local rows) 146592e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 146692e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 146792e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 146892e8d321SLois Curfman McInnes it is zero. 146992e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 147079bdfe76SSatish Balay submatrix (same for all local rows). 147192e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 147292e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 147392e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 147479bdfe76SSatish Balay 147579bdfe76SSatish Balay Output Parameter: 147679bdfe76SSatish Balay . A - the matrix 147779bdfe76SSatish Balay 147879bdfe76SSatish Balay Notes: 147979bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 148079bdfe76SSatish Balay (possibly both). 148179bdfe76SSatish Balay 148279bdfe76SSatish Balay Storage Information: 148379bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 148479bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 148579bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 148679bdfe76SSatish Balay local matrix (a rectangular submatrix). 148779bdfe76SSatish Balay 148879bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 148979bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 149079bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 149179bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 149279bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 149379bdfe76SSatish Balay 149479bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 149579bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 149679bdfe76SSatish Balay 149779bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 149879bdfe76SSatish Balay $ ------------------- 149979bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 150079bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 150179bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 150279bdfe76SSatish Balay $ ------------------- 150379bdfe76SSatish Balay $ 150479bdfe76SSatish Balay 150579bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 150679bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 150779bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 150857b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 150979bdfe76SSatish Balay 151079bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 151179bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 151279bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 151392e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 151492e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15156da5968aSLois Curfman McInnes matrices. 151679bdfe76SSatish Balay 151792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 151879bdfe76SSatish Balay 151979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 152079bdfe76SSatish Balay @*/ 152179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 152279bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 152379bdfe76SSatish Balay { 152479bdfe76SSatish Balay Mat B; 152579bdfe76SSatish Balay Mat_MPIBAIJ *b; 15264c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 152779bdfe76SSatish Balay 1528e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 152979bdfe76SSatish Balay *A = 0; 1530f09e8eb9SSatish Balay PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 153179bdfe76SSatish Balay PLogObjectCreate(B); 153279bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 153379bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 153479bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15354c50302cSBarry Smith MPI_Comm_size(comm,&size); 15364c50302cSBarry Smith if (size == 1) { 15374c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 15384c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 15394c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 15404c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 15414c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 15424c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 15434c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 15444c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 15454c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 15464c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 15474c50302cSBarry Smith } 15484c50302cSBarry Smith 154979bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 155079bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 155190f02eecSBarry Smith B->mapping = 0; 155279bdfe76SSatish Balay B->factor = 0; 155379bdfe76SSatish Balay B->assembled = PETSC_FALSE; 155479bdfe76SSatish Balay 1555e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 155679bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 155779bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 155879bdfe76SSatish Balay 155979bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1560e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1561e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1562e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1563cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1564cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 156579bdfe76SSatish Balay 156679bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 156779bdfe76SSatish Balay work[0] = m; work[1] = n; 156879bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 156979bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 157079bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 157179bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 157279bdfe76SSatish Balay } 157379bdfe76SSatish Balay if (m == PETSC_DECIDE) { 157479bdfe76SSatish Balay Mbs = M/bs; 1575e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 157679bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 157779bdfe76SSatish Balay m = mbs*bs; 157879bdfe76SSatish Balay } 157979bdfe76SSatish Balay if (n == PETSC_DECIDE) { 158079bdfe76SSatish Balay Nbs = N/bs; 1581e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 158279bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 158379bdfe76SSatish Balay n = nbs*bs; 158479bdfe76SSatish Balay } 1585e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 158679bdfe76SSatish Balay 158779bdfe76SSatish Balay b->m = m; B->m = m; 158879bdfe76SSatish Balay b->n = n; B->n = n; 158979bdfe76SSatish Balay b->N = N; B->N = N; 159079bdfe76SSatish Balay b->M = M; B->M = M; 159179bdfe76SSatish Balay b->bs = bs; 159279bdfe76SSatish Balay b->bs2 = bs*bs; 159379bdfe76SSatish Balay b->mbs = mbs; 159479bdfe76SSatish Balay b->nbs = nbs; 159579bdfe76SSatish Balay b->Mbs = Mbs; 159679bdfe76SSatish Balay b->Nbs = Nbs; 159779bdfe76SSatish Balay 159879bdfe76SSatish Balay /* build local table of row and column ownerships */ 159979bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1600f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 16010ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 160279bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 160379bdfe76SSatish Balay b->rowners[0] = 0; 160479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 160579bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 160679bdfe76SSatish Balay } 160779bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 160879bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 16094fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 16104fa0d573SSatish Balay b->rend_bs = b->rend * bs; 16114fa0d573SSatish Balay 161279bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 161379bdfe76SSatish Balay b->cowners[0] = 0; 161479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 161579bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 161679bdfe76SSatish Balay } 161779bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 161879bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 16194fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 16204fa0d573SSatish Balay b->cend_bs = b->cend * bs; 162179bdfe76SSatish Balay 162279bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1623029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 162479bdfe76SSatish Balay PLogObjectParent(B,b->A); 162579bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1626029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 162779bdfe76SSatish Balay PLogObjectParent(B,b->B); 162879bdfe76SSatish Balay 162979bdfe76SSatish Balay /* build cache for off array entries formed */ 163079bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 163190f02eecSBarry Smith b->donotstash = 0; 163279bdfe76SSatish Balay b->colmap = 0; 163379bdfe76SSatish Balay b->garray = 0; 163479bdfe76SSatish Balay b->roworiented = 1; 163579bdfe76SSatish Balay 163630793edcSSatish Balay /* stuff used in block assembly */ 163730793edcSSatish Balay b->barray = 0; 163830793edcSSatish Balay 163979bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 164079bdfe76SSatish Balay b->lvec = 0; 164179bdfe76SSatish Balay b->Mvctx = 0; 164279bdfe76SSatish Balay 164379bdfe76SSatish Balay /* stuff for MatGetRow() */ 164479bdfe76SSatish Balay b->rowindices = 0; 164579bdfe76SSatish Balay b->rowvalues = 0; 164679bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 164779bdfe76SSatish Balay 164879bdfe76SSatish Balay *A = B; 164979bdfe76SSatish Balay return 0; 165079bdfe76SSatish Balay } 1651026e39d0SSatish Balay 16525615d1e5SSatish Balay #undef __FUNC__ 16535615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 16540ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 16550ac07820SSatish Balay { 16560ac07820SSatish Balay Mat mat; 16570ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 16580ac07820SSatish Balay int ierr, len=0, flg; 16590ac07820SSatish Balay 16600ac07820SSatish Balay *newmat = 0; 1661f09e8eb9SSatish Balay PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 16620ac07820SSatish Balay PLogObjectCreate(mat); 16630ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 16640ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 16650ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 16660ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 16670ac07820SSatish Balay mat->factor = matin->factor; 16680ac07820SSatish Balay mat->assembled = PETSC_TRUE; 16690ac07820SSatish Balay 16700ac07820SSatish Balay a->m = mat->m = oldmat->m; 16710ac07820SSatish Balay a->n = mat->n = oldmat->n; 16720ac07820SSatish Balay a->M = mat->M = oldmat->M; 16730ac07820SSatish Balay a->N = mat->N = oldmat->N; 16740ac07820SSatish Balay 16750ac07820SSatish Balay a->bs = oldmat->bs; 16760ac07820SSatish Balay a->bs2 = oldmat->bs2; 16770ac07820SSatish Balay a->mbs = oldmat->mbs; 16780ac07820SSatish Balay a->nbs = oldmat->nbs; 16790ac07820SSatish Balay a->Mbs = oldmat->Mbs; 16800ac07820SSatish Balay a->Nbs = oldmat->Nbs; 16810ac07820SSatish Balay 16820ac07820SSatish Balay a->rstart = oldmat->rstart; 16830ac07820SSatish Balay a->rend = oldmat->rend; 16840ac07820SSatish Balay a->cstart = oldmat->cstart; 16850ac07820SSatish Balay a->cend = oldmat->cend; 16860ac07820SSatish Balay a->size = oldmat->size; 16870ac07820SSatish Balay a->rank = oldmat->rank; 1688e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 16890ac07820SSatish Balay a->rowvalues = 0; 16900ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 169130793edcSSatish Balay a->barray = 0; 16920ac07820SSatish Balay 16930ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1694f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 16950ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 16960ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 16970ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16980ac07820SSatish Balay if (oldmat->colmap) { 16990ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 17000ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 17010ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 17020ac07820SSatish Balay } else a->colmap = 0; 17030ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 17040ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 17050ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 17060ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 17070ac07820SSatish Balay } else a->garray = 0; 17080ac07820SSatish Balay 17090ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 17100ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 17110ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 17120ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 17130ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 17140ac07820SSatish Balay PLogObjectParent(mat,a->A); 17150ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 17160ac07820SSatish Balay PLogObjectParent(mat,a->B); 17170ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 17180ac07820SSatish Balay if (flg) { 17190ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 17200ac07820SSatish Balay } 17210ac07820SSatish Balay *newmat = mat; 17220ac07820SSatish Balay return 0; 17230ac07820SSatish Balay } 172457b952d6SSatish Balay 172557b952d6SSatish Balay #include "sys.h" 172657b952d6SSatish Balay 17275615d1e5SSatish Balay #undef __FUNC__ 17285615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 172957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 173057b952d6SSatish Balay { 173157b952d6SSatish Balay Mat A; 173257b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 173357b952d6SSatish Balay Scalar *vals,*buf; 173457b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 173557b952d6SSatish Balay MPI_Status status; 1736cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 173757b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 173857b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 173957b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 174057b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 174157b952d6SSatish Balay 174257b952d6SSatish Balay 174357b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 174457b952d6SSatish Balay bs2 = bs*bs; 174557b952d6SSatish Balay 174657b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 174757b952d6SSatish Balay if (!rank) { 174857b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 174957b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1750e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 175157b952d6SSatish Balay } 175257b952d6SSatish Balay 175357b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 175457b952d6SSatish Balay M = header[1]; N = header[2]; 175557b952d6SSatish Balay 1756e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 175757b952d6SSatish Balay 175857b952d6SSatish Balay /* 175957b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 176057b952d6SSatish Balay divisible by the blocksize 176157b952d6SSatish Balay */ 176257b952d6SSatish Balay Mbs = M/bs; 176357b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 176457b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 176557b952d6SSatish Balay else Mbs++; 176657b952d6SSatish Balay if (extra_rows &&!rank) { 1767b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 176857b952d6SSatish Balay } 1769537820f0SBarry Smith 177057b952d6SSatish Balay /* determine ownership of all rows */ 177157b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 177257b952d6SSatish Balay m = mbs * bs; 1773cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1774cee3aa6bSSatish Balay browners = rowners + size + 1; 177557b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 177657b952d6SSatish Balay rowners[0] = 0; 1777cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1778cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 177957b952d6SSatish Balay rstart = rowners[rank]; 178057b952d6SSatish Balay rend = rowners[rank+1]; 178157b952d6SSatish Balay 178257b952d6SSatish Balay /* distribute row lengths to all processors */ 178357b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 178457b952d6SSatish Balay if (!rank) { 178557b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 178657b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 178757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 178857b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1789cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1790cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 179157b952d6SSatish Balay PetscFree(sndcounts); 179257b952d6SSatish Balay } 179357b952d6SSatish Balay else { 179457b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 179557b952d6SSatish Balay } 179657b952d6SSatish Balay 179757b952d6SSatish Balay if (!rank) { 179857b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 179957b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 180057b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 180157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 180257b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 180357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 180457b952d6SSatish Balay } 180557b952d6SSatish Balay } 180657b952d6SSatish Balay PetscFree(rowlengths); 180757b952d6SSatish Balay 180857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 180957b952d6SSatish Balay maxnz = 0; 181057b952d6SSatish Balay for ( i=0; i<size; i++ ) { 181157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 181257b952d6SSatish Balay } 181357b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 181457b952d6SSatish Balay 181557b952d6SSatish Balay /* read in my part of the matrix column indices */ 181657b952d6SSatish Balay nz = procsnz[0]; 181757b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 181857b952d6SSatish Balay mycols = ibuf; 1819cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 182057b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1821cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1822cee3aa6bSSatish Balay 182357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 182457b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 182557b952d6SSatish Balay nz = procsnz[i]; 182657b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 182757b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 182857b952d6SSatish Balay } 182957b952d6SSatish Balay /* read in the stuff for the last proc */ 183057b952d6SSatish Balay if ( size != 1 ) { 183157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 183257b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 183357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1834cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 183557b952d6SSatish Balay } 183657b952d6SSatish Balay PetscFree(cols); 183757b952d6SSatish Balay } 183857b952d6SSatish Balay else { 183957b952d6SSatish Balay /* determine buffer space needed for message */ 184057b952d6SSatish Balay nz = 0; 184157b952d6SSatish Balay for ( i=0; i<m; i++ ) { 184257b952d6SSatish Balay nz += locrowlens[i]; 184357b952d6SSatish Balay } 184457b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 184557b952d6SSatish Balay mycols = ibuf; 184657b952d6SSatish Balay /* receive message of column indices*/ 184757b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 184857b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1849e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 185057b952d6SSatish Balay } 185157b952d6SSatish Balay 185257b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1853cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1854cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 185557b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1856cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 185757b952d6SSatish Balay masked1 = mask + Mbs; 185857b952d6SSatish Balay masked2 = masked1 + Mbs; 185957b952d6SSatish Balay rowcount = 0; nzcount = 0; 186057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 186157b952d6SSatish Balay dcount = 0; 186257b952d6SSatish Balay odcount = 0; 186357b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 186457b952d6SSatish Balay kmax = locrowlens[rowcount]; 186557b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 186657b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 186757b952d6SSatish Balay if (!mask[tmp]) { 186857b952d6SSatish Balay mask[tmp] = 1; 186957b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 187057b952d6SSatish Balay else masked1[dcount++] = tmp; 187157b952d6SSatish Balay } 187257b952d6SSatish Balay } 187357b952d6SSatish Balay rowcount++; 187457b952d6SSatish Balay } 1875cee3aa6bSSatish Balay 187657b952d6SSatish Balay dlens[i] = dcount; 187757b952d6SSatish Balay odlens[i] = odcount; 1878cee3aa6bSSatish Balay 187957b952d6SSatish Balay /* zero out the mask elements we set */ 188057b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 188157b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 188257b952d6SSatish Balay } 1883cee3aa6bSSatish Balay 188457b952d6SSatish Balay /* create our matrix */ 1885537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1886537820f0SBarry Smith CHKERRQ(ierr); 188757b952d6SSatish Balay A = *newmat; 18886d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 188957b952d6SSatish Balay 189057b952d6SSatish Balay if (!rank) { 189157b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 189257b952d6SSatish Balay /* read in my part of the matrix numerical values */ 189357b952d6SSatish Balay nz = procsnz[0]; 189457b952d6SSatish Balay vals = buf; 1895cee3aa6bSSatish Balay mycols = ibuf; 1896cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 189757b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1898cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1899537820f0SBarry Smith 190057b952d6SSatish Balay /* insert into matrix */ 190157b952d6SSatish Balay jj = rstart*bs; 190257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 190357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 190457b952d6SSatish Balay mycols += locrowlens[i]; 190557b952d6SSatish Balay vals += locrowlens[i]; 190657b952d6SSatish Balay jj++; 190757b952d6SSatish Balay } 190857b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 190957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 191057b952d6SSatish Balay nz = procsnz[i]; 191157b952d6SSatish Balay vals = buf; 191257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 191357b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 191457b952d6SSatish Balay } 191557b952d6SSatish Balay /* the last proc */ 191657b952d6SSatish Balay if ( size != 1 ){ 191757b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1918cee3aa6bSSatish Balay vals = buf; 191957b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 192057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1921cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 192257b952d6SSatish Balay } 192357b952d6SSatish Balay PetscFree(procsnz); 192457b952d6SSatish Balay } 192557b952d6SSatish Balay else { 192657b952d6SSatish Balay /* receive numeric values */ 192757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 192857b952d6SSatish Balay 192957b952d6SSatish Balay /* receive message of values*/ 193057b952d6SSatish Balay vals = buf; 1931cee3aa6bSSatish Balay mycols = ibuf; 193257b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 193357b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1934e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 193557b952d6SSatish Balay 193657b952d6SSatish Balay /* insert into matrix */ 193757b952d6SSatish Balay jj = rstart*bs; 1938cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 193957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 194057b952d6SSatish Balay mycols += locrowlens[i]; 194157b952d6SSatish Balay vals += locrowlens[i]; 194257b952d6SSatish Balay jj++; 194357b952d6SSatish Balay } 194457b952d6SSatish Balay } 194557b952d6SSatish Balay PetscFree(locrowlens); 194657b952d6SSatish Balay PetscFree(buf); 194757b952d6SSatish Balay PetscFree(ibuf); 194857b952d6SSatish Balay PetscFree(rowners); 194957b952d6SSatish Balay PetscFree(dlens); 1950cee3aa6bSSatish Balay PetscFree(mask); 19516d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19526d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 195357b952d6SSatish Balay return 0; 195457b952d6SSatish Balay } 195557b952d6SSatish Balay 195657b952d6SSatish Balay 1957