179bdfe76SSatish Balay #ifndef lint 2*f09e8eb9SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.70 1997/05/09 22:32:36 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2257b952d6SSatish Balay 233b2fbd54SBarry Smith 24537820f0SBarry Smith /* 25537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2857b952d6SSatish Balay length of colmap equals the global matrix length. 2957b952d6SSatish Balay */ 305615d1e5SSatish Balay #undef __FUNC__ 315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3357b952d6SSatish Balay { 3457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3757b952d6SSatish Balay 38758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3957b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4057b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4257b952d6SSatish Balay return 0; 4357b952d6SSatish Balay } 4457b952d6SSatish Balay 455615d1e5SSatish Balay #undef __FUNC__ 465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 4957b952d6SSatish Balay { 503b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5157b952d6SSatish Balay int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 553b2fbd54SBarry Smith return 0; 563b2fbd54SBarry Smith } 573b2fbd54SBarry Smith 585615d1e5SSatish Balay #undef __FUNC__ 595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 613b2fbd54SBarry Smith PetscTruth *done) 623b2fbd54SBarry Smith { 633b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 643b2fbd54SBarry Smith int ierr; 653b2fbd54SBarry Smith if (aij->size == 1) { 663b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6857b952d6SSatish Balay return 0; 6957b952d6SSatish Balay } 7080c1aa95SSatish Balay #define CHUNKSIZE 10 7180c1aa95SSatish Balay 72f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7380c1aa95SSatish Balay { \ 7480c1aa95SSatish Balay \ 7580c1aa95SSatish Balay brow = row/bs; \ 7680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 7880c1aa95SSatish Balay bcol = col/bs; \ 7980c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 80ab26458aSBarry Smith low = 0; high = nrow; \ 81ab26458aSBarry Smith while (high-low > 3) { \ 82ab26458aSBarry Smith t = (low+high)/2; \ 83ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 84ab26458aSBarry Smith else low = t; \ 85ab26458aSBarry Smith } \ 86ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 8780c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8880c1aa95SSatish Balay if (rp[_i] == bcol) { \ 8980c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 90eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 91eada6651SSatish Balay else *bap = value; \ 92ac7a638eSSatish Balay goto a_noinsert; \ 9380c1aa95SSatish Balay } \ 9480c1aa95SSatish Balay } \ 9589280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 9611ebbc71SLois Curfman McInnes else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 9780c1aa95SSatish Balay if (nrow >= rmax) { \ 9880c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9980c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 10080c1aa95SSatish Balay Scalar *new_a; \ 10180c1aa95SSatish Balay \ 10211ebbc71SLois Curfman McInnes if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 10389280ab3SLois Curfman McInnes \ 10480c1aa95SSatish Balay /* malloc new storage space */ \ 10580c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 10680c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 10780c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10880c1aa95SSatish Balay new_i = new_j + new_nz; \ 10980c1aa95SSatish Balay \ 11080c1aa95SSatish Balay /* copy over old data into new slots */ \ 11180c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 11280c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 11380c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 11480c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 11580c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 11680c1aa95SSatish Balay len*sizeof(int)); \ 11780c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11880c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11980c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 12080c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 12180c1aa95SSatish Balay /* free up old matrix storage */ \ 12280c1aa95SSatish Balay PetscFree(a->a); \ 12380c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 12480c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 12580c1aa95SSatish Balay a->singlemalloc = 1; \ 12680c1aa95SSatish Balay \ 12780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 128ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 12980c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 13080c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 13180c1aa95SSatish Balay a->reallocs++; \ 13280c1aa95SSatish Balay a->nz++; \ 13380c1aa95SSatish Balay } \ 13480c1aa95SSatish Balay N = nrow++ - 1; \ 13580c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 13680c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 13780c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13880c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13980c1aa95SSatish Balay } \ 14080c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 14180c1aa95SSatish Balay rp[_i] = bcol; \ 14280c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 143ac7a638eSSatish Balay a_noinsert:; \ 14480c1aa95SSatish Balay ailen[brow] = nrow; \ 14580c1aa95SSatish Balay } 14657b952d6SSatish Balay 147ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148ac7a638eSSatish Balay { \ 149ac7a638eSSatish Balay \ 150ac7a638eSSatish Balay brow = row/bs; \ 151ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 153ac7a638eSSatish Balay bcol = col/bs; \ 154ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 155ac7a638eSSatish Balay low = 0; high = nrow; \ 156ac7a638eSSatish Balay while (high-low > 3) { \ 157ac7a638eSSatish Balay t = (low+high)/2; \ 158ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 159ac7a638eSSatish Balay else low = t; \ 160ac7a638eSSatish Balay } \ 161ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 162ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 163ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 164ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 165ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 166ac7a638eSSatish Balay else *bap = value; \ 167ac7a638eSSatish Balay goto b_noinsert; \ 168ac7a638eSSatish Balay } \ 169ac7a638eSSatish Balay } \ 17089280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 17111ebbc71SLois Curfman McInnes else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 172ac7a638eSSatish Balay if (nrow >= rmax) { \ 173ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 17474c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 175ac7a638eSSatish Balay Scalar *new_a; \ 176ac7a638eSSatish Balay \ 17711ebbc71SLois Curfman McInnes if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 17889280ab3SLois Curfman McInnes \ 179ac7a638eSSatish Balay /* malloc new storage space */ \ 18074c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 181ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 182ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 183ac7a638eSSatish Balay new_i = new_j + new_nz; \ 184ac7a638eSSatish Balay \ 185ac7a638eSSatish Balay /* copy over old data into new slots */ \ 186ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 18774c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 188ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 189ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 190ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 191ac7a638eSSatish Balay len*sizeof(int)); \ 192ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 193ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 194ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 195ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 196ac7a638eSSatish Balay /* free up old matrix storage */ \ 19774c639caSSatish Balay PetscFree(b->a); \ 19874c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 19974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 20074c639caSSatish Balay b->singlemalloc = 1; \ 201ac7a638eSSatish Balay \ 202ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 203ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 20474c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 20574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 20674c639caSSatish Balay b->reallocs++; \ 20774c639caSSatish Balay b->nz++; \ 208ac7a638eSSatish Balay } \ 209ac7a638eSSatish Balay N = nrow++ - 1; \ 210ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 211ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 212ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 213ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 214ac7a638eSSatish Balay } \ 215ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 216ac7a638eSSatish Balay rp[_i] = bcol; \ 217ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 218ac7a638eSSatish Balay b_noinsert:; \ 219ac7a638eSSatish Balay bilen[brow] = nrow; \ 220ac7a638eSSatish Balay } 221ac7a638eSSatish Balay 2225615d1e5SSatish Balay #undef __FUNC__ 2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 22557b952d6SSatish Balay { 22657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22757b952d6SSatish Balay Scalar value; 2284fa0d573SSatish Balay int ierr,i,j,row,col; 2294fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2304fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2314fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 23257b952d6SSatish Balay 233eada6651SSatish Balay /* Some Variables required in the macro */ 23480c1aa95SSatish Balay Mat A = baij->A; 23580c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 236ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 237ac7a638eSSatish Balay Scalar *aa=a->a; 238ac7a638eSSatish Balay 239ac7a638eSSatish Balay Mat B = baij->B; 240ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 241ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 242ac7a638eSSatish Balay Scalar *ba=b->a; 243ac7a638eSSatish Balay 244ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 245ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 246ac7a638eSSatish Balay Scalar *ap,*bap; 24780c1aa95SSatish Balay 24857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 250e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 251e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 252639f9d9dSBarry Smith #endif 25357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 25457b952d6SSatish Balay row = im[i] - rstart_orig; 25557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 25657b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 25757b952d6SSatish Balay col = in[j] - cstart_orig; 25857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 26080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 26157b952d6SSatish Balay } 262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 263e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 264e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 265639f9d9dSBarry Smith #endif 26657b952d6SSatish Balay else { 26757b952d6SSatish Balay if (mat->was_assembled) { 268905e6a2fSBarry Smith if (!baij->colmap) { 269905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 270905e6a2fSBarry Smith } 271905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 27257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 27357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 27457b952d6SSatish Balay col = in[j]; 2759bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2769bf004c3SSatish Balay B = baij->B; 2779bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2789bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2799bf004c3SSatish Balay ba=b->a; 28057b952d6SSatish Balay } 28157b952d6SSatish Balay } 28257b952d6SSatish Balay else col = in[j]; 28357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 284ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 285ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 28657b952d6SSatish Balay } 28757b952d6SSatish Balay } 28857b952d6SSatish Balay } 28957b952d6SSatish Balay else { 29090f02eecSBarry Smith if (roworiented && !baij->donotstash) { 29157b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 29257b952d6SSatish Balay } 29357b952d6SSatish Balay else { 29490f02eecSBarry Smith if (!baij->donotstash) { 29557b952d6SSatish Balay row = im[i]; 29657b952d6SSatish Balay for ( j=0; j<n; j++ ) { 29757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 29857b952d6SSatish Balay } 29957b952d6SSatish Balay } 30057b952d6SSatish Balay } 30157b952d6SSatish Balay } 30290f02eecSBarry Smith } 30357b952d6SSatish Balay return 0; 30457b952d6SSatish Balay } 30557b952d6SSatish Balay 306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 307ab26458aSBarry Smith #undef __FUNC__ 308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 310ab26458aSBarry Smith { 311ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 31230793edcSSatish Balay Scalar *value,*barray=baij->barray; 313abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 314ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 315ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 316ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 317ab26458aSBarry Smith 31830793edcSSatish Balay 31930793edcSSatish Balay if(!barray) { 32030793edcSSatish Balay barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 32130793edcSSatish Balay baij->barray = barray; 32230793edcSSatish Balay } 32330793edcSSatish Balay 324ab26458aSBarry Smith if (roworiented) { 325ab26458aSBarry Smith stepval = (n-1)*bs; 326ab26458aSBarry Smith } else { 327ab26458aSBarry Smith stepval = (m-1)*bs; 328ab26458aSBarry Smith } 329ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 330ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 331ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 332ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 333ab26458aSBarry Smith #endif 334ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 335ab26458aSBarry Smith row = im[i] - rstart; 336ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 33715b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 33815b57d14SSatish Balay if ((roworiented) && (n == 1)) { 33915b57d14SSatish Balay barray = v + i*bs2; 34015b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 34115b57d14SSatish Balay barray = v + j*bs2; 34215b57d14SSatish Balay } else { /* Here a copy is required */ 343ab26458aSBarry Smith if (roworiented) { 344ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 345ab26458aSBarry Smith } else { 346ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 347abef11f7SSatish Balay } 348ab26458aSBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) 349ab26458aSBarry Smith for (jj=0; jj<bs; jj++ ) 35030793edcSSatish Balay *barray++ = *value++; 35130793edcSSatish Balay barray -=bs2; 35215b57d14SSatish Balay } 353abef11f7SSatish Balay 354abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 355abef11f7SSatish Balay col = in[j] - cstart; 35630793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 357ab26458aSBarry Smith } 358ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 359ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 360ab26458aSBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 361ab26458aSBarry Smith #endif 362ab26458aSBarry Smith else { 363ab26458aSBarry Smith if (mat->was_assembled) { 364ab26458aSBarry Smith if (!baij->colmap) { 365ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 366ab26458aSBarry Smith } 367ab26458aSBarry Smith col = baij->colmap[in[j]] - 1; 368ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 369ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 370ab26458aSBarry Smith col = in[j]; 371ab26458aSBarry Smith } 372ab26458aSBarry Smith } 373ab26458aSBarry Smith else col = in[j]; 37430793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 375ab26458aSBarry Smith } 376ab26458aSBarry Smith } 377ab26458aSBarry Smith } 378ab26458aSBarry Smith else { 379ab26458aSBarry Smith if (!baij->donotstash) { 380ab26458aSBarry Smith if (roworiented ) { 381abef11f7SSatish Balay row = im[i]*bs; 382abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 383abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 384abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 385abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 386abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 387abef11f7SSatish Balay } 388ab26458aSBarry Smith } 389ab26458aSBarry Smith } 390ab26458aSBarry Smith } 391ab26458aSBarry Smith else { 392ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 393abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 394abef11f7SSatish Balay col = in[j]*bs; 395abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 396abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 397abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 398ab26458aSBarry Smith } 399ab26458aSBarry Smith } 400ab26458aSBarry Smith } 401abef11f7SSatish Balay 402abef11f7SSatish Balay } 403abef11f7SSatish Balay } 404ab26458aSBarry Smith } 405ab26458aSBarry Smith } 406ab26458aSBarry Smith return 0; 407ab26458aSBarry Smith } 408ab26458aSBarry Smith 4095615d1e5SSatish Balay #undef __FUNC__ 4105615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 411ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 412d6de1c52SSatish Balay { 413d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 414d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 415d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 416d6de1c52SSatish Balay 417d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 418e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 419e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 420d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 421d6de1c52SSatish Balay row = idxm[i] - bsrstart; 422d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 423e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 424e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 425d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 426d6de1c52SSatish Balay col = idxn[j] - bscstart; 427d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 428d6de1c52SSatish Balay } 429d6de1c52SSatish Balay else { 430905e6a2fSBarry Smith if (!baij->colmap) { 431905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 432905e6a2fSBarry Smith } 433e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 434dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 435d9d09a02SSatish Balay else { 436dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 437d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 438d6de1c52SSatish Balay } 439d6de1c52SSatish Balay } 440d6de1c52SSatish Balay } 441d9d09a02SSatish Balay } 442d6de1c52SSatish Balay else { 443e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 444d6de1c52SSatish Balay } 445d6de1c52SSatish Balay } 446d6de1c52SSatish Balay return 0; 447d6de1c52SSatish Balay } 448d6de1c52SSatish Balay 4495615d1e5SSatish Balay #undef __FUNC__ 4505615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 451ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 452d6de1c52SSatish Balay { 453d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 454d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 455acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 456d6de1c52SSatish Balay double sum = 0.0; 457d6de1c52SSatish Balay Scalar *v; 458d6de1c52SSatish Balay 459d6de1c52SSatish Balay if (baij->size == 1) { 460d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 461d6de1c52SSatish Balay } else { 462d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 463d6de1c52SSatish Balay v = amat->a; 464d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 465d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 466d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 467d6de1c52SSatish Balay #else 468d6de1c52SSatish Balay sum += (*v)*(*v); v++; 469d6de1c52SSatish Balay #endif 470d6de1c52SSatish Balay } 471d6de1c52SSatish Balay v = bmat->a; 472d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 473d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 474d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 475d6de1c52SSatish Balay #else 476d6de1c52SSatish Balay sum += (*v)*(*v); v++; 477d6de1c52SSatish Balay #endif 478d6de1c52SSatish Balay } 479d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 480d6de1c52SSatish Balay *norm = sqrt(*norm); 481d6de1c52SSatish Balay } 482acdf5bf4SSatish Balay else 483e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 484d6de1c52SSatish Balay } 485d6de1c52SSatish Balay return 0; 486d6de1c52SSatish Balay } 48757b952d6SSatish Balay 4885615d1e5SSatish Balay #undef __FUNC__ 4895615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 490ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 49157b952d6SSatish Balay { 49257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 49357b952d6SSatish Balay MPI_Comm comm = mat->comm; 49457b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 49557b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 49657b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 49757b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 49857b952d6SSatish Balay InsertMode addv; 49957b952d6SSatish Balay Scalar *rvalues,*svalues; 50057b952d6SSatish Balay 50157b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 502e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 50357b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 504e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 50557b952d6SSatish Balay } 506e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 50757b952d6SSatish Balay 50857b952d6SSatish Balay /* first count number of contributors to each processor */ 50957b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 51057b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 51157b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 51257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 51357b952d6SSatish Balay idx = baij->stash.idx[i]; 51457b952d6SSatish Balay for ( j=0; j<size; j++ ) { 51557b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 51657b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 51757b952d6SSatish Balay } 51857b952d6SSatish Balay } 51957b952d6SSatish Balay } 52057b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 52157b952d6SSatish Balay 52257b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 52357b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 52457b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 52557b952d6SSatish Balay nreceives = work[rank]; 52657b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 52757b952d6SSatish Balay nmax = work[rank]; 52857b952d6SSatish Balay PetscFree(work); 52957b952d6SSatish Balay 53057b952d6SSatish Balay /* post receives: 53157b952d6SSatish Balay 1) each message will consist of ordered pairs 53257b952d6SSatish Balay (global index,value) we store the global index as a double 53357b952d6SSatish Balay to simplify the message passing. 53457b952d6SSatish Balay 2) since we don't know how long each individual message is we 53557b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 53657b952d6SSatish Balay this is a lot of wasted space. 53757b952d6SSatish Balay 53857b952d6SSatish Balay 53957b952d6SSatish Balay This could be done better. 54057b952d6SSatish Balay */ 54157b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 54257b952d6SSatish Balay CHKPTRQ(rvalues); 54357b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 54457b952d6SSatish Balay CHKPTRQ(recv_waits); 54557b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 54657b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 54757b952d6SSatish Balay comm,recv_waits+i); 54857b952d6SSatish Balay } 54957b952d6SSatish Balay 55057b952d6SSatish Balay /* do sends: 55157b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 55257b952d6SSatish Balay the ith processor 55357b952d6SSatish Balay */ 55457b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 55557b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 55657b952d6SSatish Balay CHKPTRQ(send_waits); 55757b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 55857b952d6SSatish Balay starts[0] = 0; 55957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 56057b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 56157b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 56257b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 56357b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 56457b952d6SSatish Balay } 56557b952d6SSatish Balay PetscFree(owner); 56657b952d6SSatish Balay starts[0] = 0; 56757b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 56857b952d6SSatish Balay count = 0; 56957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 57057b952d6SSatish Balay if (procs[i]) { 57157b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 57257b952d6SSatish Balay comm,send_waits+count++); 57357b952d6SSatish Balay } 57457b952d6SSatish Balay } 57557b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 57657b952d6SSatish Balay 57757b952d6SSatish Balay /* Free cache space */ 578d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 57957b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 58057b952d6SSatish Balay 58157b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 58257b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 58357b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 58457b952d6SSatish Balay baij->rmax = nmax; 58557b952d6SSatish Balay 58657b952d6SSatish Balay return 0; 58757b952d6SSatish Balay } 58857b952d6SSatish Balay 58957b952d6SSatish Balay 5905615d1e5SSatish Balay #undef __FUNC__ 5915615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 592ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 59357b952d6SSatish Balay { 59457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 59557b952d6SSatish Balay MPI_Status *send_status,recv_status; 59657b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 59757b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 59857b952d6SSatish Balay Scalar *values,val; 599e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 60057b952d6SSatish Balay 60157b952d6SSatish Balay /* wait on receives */ 60257b952d6SSatish Balay while (count) { 60357b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 60457b952d6SSatish Balay /* unpack receives into our local space */ 60557b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 60657b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 60757b952d6SSatish Balay n = n/3; 60857b952d6SSatish Balay for ( i=0; i<n; i++ ) { 60957b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 61057b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 61157b952d6SSatish Balay val = values[3*i+2]; 61257b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 61357b952d6SSatish Balay col -= baij->cstart*bs; 61457b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 61557b952d6SSatish Balay } 61657b952d6SSatish Balay else { 61757b952d6SSatish Balay if (mat->was_assembled) { 618905e6a2fSBarry Smith if (!baij->colmap) { 619905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 620905e6a2fSBarry Smith } 621905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 62257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 62357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 62457b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 62557b952d6SSatish Balay } 62657b952d6SSatish Balay } 62757b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 62857b952d6SSatish Balay } 62957b952d6SSatish Balay } 63057b952d6SSatish Balay count--; 63157b952d6SSatish Balay } 63257b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 63357b952d6SSatish Balay 63457b952d6SSatish Balay /* wait on sends */ 63557b952d6SSatish Balay if (baij->nsends) { 63657b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 63757b952d6SSatish Balay CHKPTRQ(send_status); 63857b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 63957b952d6SSatish Balay PetscFree(send_status); 64057b952d6SSatish Balay } 64157b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 64257b952d6SSatish Balay 64357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 64457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 64557b952d6SSatish Balay 64657b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 64757b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 64857b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 64957b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 65057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 65157b952d6SSatish Balay } 65257b952d6SSatish Balay 6536d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 65457b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 65557b952d6SSatish Balay } 65657b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 65757b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 65857b952d6SSatish Balay 65957b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 66057b952d6SSatish Balay return 0; 66157b952d6SSatish Balay } 66257b952d6SSatish Balay 6635615d1e5SSatish Balay #undef __FUNC__ 6645615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 66557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 66657b952d6SSatish Balay { 66757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 66857b952d6SSatish Balay int ierr; 66957b952d6SSatish Balay 67057b952d6SSatish Balay if (baij->size == 1) { 67157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 67257b952d6SSatish Balay } 673e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 67457b952d6SSatish Balay return 0; 67557b952d6SSatish Balay } 67657b952d6SSatish Balay 6775615d1e5SSatish Balay #undef __FUNC__ 6785615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 67957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 68057b952d6SSatish Balay { 68157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 682cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 68357b952d6SSatish Balay FILE *fd; 68457b952d6SSatish Balay ViewerType vtype; 68557b952d6SSatish Balay 68657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 68757b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 68857b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 689639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 6904e220ebcSLois Curfman McInnes MatInfo info; 69157b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 69257b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6934e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 69457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 69557b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 6964e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 6974e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 6984e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 6994e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 7004e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 7014e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 70257b952d6SSatish Balay fflush(fd); 70357b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 70457b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 70557b952d6SSatish Balay return 0; 70657b952d6SSatish Balay } 707639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 708bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 70957b952d6SSatish Balay return 0; 71057b952d6SSatish Balay } 71157b952d6SSatish Balay } 71257b952d6SSatish Balay 71357b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 71457b952d6SSatish Balay Draw draw; 71557b952d6SSatish Balay PetscTruth isnull; 71657b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 71757b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 71857b952d6SSatish Balay } 71957b952d6SSatish Balay 72057b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 72157b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 72257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 72357b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 72457b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 72557b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 72657b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 72757b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 72857b952d6SSatish Balay fflush(fd); 72957b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 73057b952d6SSatish Balay } 73157b952d6SSatish Balay else { 73257b952d6SSatish Balay int size = baij->size; 73357b952d6SSatish Balay rank = baij->rank; 73457b952d6SSatish Balay if (size == 1) { 73557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 73657b952d6SSatish Balay } 73757b952d6SSatish Balay else { 73857b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 73957b952d6SSatish Balay Mat A; 74057b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 74157b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 74257b952d6SSatish Balay int mbs=baij->mbs; 74357b952d6SSatish Balay Scalar *a; 74457b952d6SSatish Balay 74557b952d6SSatish Balay if (!rank) { 746cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 74757b952d6SSatish Balay CHKERRQ(ierr); 74857b952d6SSatish Balay } 74957b952d6SSatish Balay else { 750cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 75157b952d6SSatish Balay CHKERRQ(ierr); 75257b952d6SSatish Balay } 75357b952d6SSatish Balay PLogObjectParent(mat,A); 75457b952d6SSatish Balay 75557b952d6SSatish Balay /* copy over the A part */ 75657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 75757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 75857b952d6SSatish Balay row = baij->rstart; 75957b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 76057b952d6SSatish Balay 76157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 76257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 76357b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 76457b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 76557b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 76657b952d6SSatish Balay for (k=0; k<bs; k++ ) { 767cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 768cee3aa6bSSatish Balay col++; a += bs; 76957b952d6SSatish Balay } 77057b952d6SSatish Balay } 77157b952d6SSatish Balay } 77257b952d6SSatish Balay /* copy over the B part */ 77357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 77457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 77557b952d6SSatish Balay row = baij->rstart*bs; 77657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 77757b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 77857b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 77957b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 78057b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 78157b952d6SSatish Balay for (k=0; k<bs; k++ ) { 782cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 783cee3aa6bSSatish Balay col++; a += bs; 78457b952d6SSatish Balay } 78557b952d6SSatish Balay } 78657b952d6SSatish Balay } 78757b952d6SSatish Balay PetscFree(rvals); 7886d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7896d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 79057b952d6SSatish Balay if (!rank) { 79157b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 79257b952d6SSatish Balay } 79357b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 79457b952d6SSatish Balay } 79557b952d6SSatish Balay } 79657b952d6SSatish Balay return 0; 79757b952d6SSatish Balay } 79857b952d6SSatish Balay 79957b952d6SSatish Balay 80057b952d6SSatish Balay 8015615d1e5SSatish Balay #undef __FUNC__ 8025615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 803ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 80457b952d6SSatish Balay { 80557b952d6SSatish Balay Mat mat = (Mat) obj; 80657b952d6SSatish Balay int ierr; 80757b952d6SSatish Balay ViewerType vtype; 80857b952d6SSatish Balay 80957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 81057b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 81157b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 81257b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 81357b952d6SSatish Balay } 81457b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 81557b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 81657b952d6SSatish Balay } 81757b952d6SSatish Balay return 0; 81857b952d6SSatish Balay } 81957b952d6SSatish Balay 8205615d1e5SSatish Balay #undef __FUNC__ 8215615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 822ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 82379bdfe76SSatish Balay { 82479bdfe76SSatish Balay Mat mat = (Mat) obj; 82579bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 82679bdfe76SSatish Balay int ierr; 82779bdfe76SSatish Balay 82879bdfe76SSatish Balay #if defined(PETSC_LOG) 82979bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 83079bdfe76SSatish Balay #endif 83179bdfe76SSatish Balay 83279bdfe76SSatish Balay PetscFree(baij->rowners); 83379bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 83479bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 83579bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 83679bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 83779bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 83879bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 83979bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 84030793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 84179bdfe76SSatish Balay PetscFree(baij); 84290f02eecSBarry Smith if (mat->mapping) { 84390f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 84490f02eecSBarry Smith } 84579bdfe76SSatish Balay PLogObjectDestroy(mat); 84679bdfe76SSatish Balay PetscHeaderDestroy(mat); 84779bdfe76SSatish Balay return 0; 84879bdfe76SSatish Balay } 84979bdfe76SSatish Balay 8505615d1e5SSatish Balay #undef __FUNC__ 8515615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 852ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 853cee3aa6bSSatish Balay { 854cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 85547b4a8eaSLois Curfman McInnes int ierr, nt; 856cee3aa6bSSatish Balay 857c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 85847b4a8eaSLois Curfman McInnes if (nt != a->n) { 859ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 86047b4a8eaSLois Curfman McInnes } 861c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 86247b4a8eaSLois Curfman McInnes if (nt != a->m) { 863e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 86447b4a8eaSLois Curfman McInnes } 86543a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 866cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 86743a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 868cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 86943a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 870cee3aa6bSSatish Balay return 0; 871cee3aa6bSSatish Balay } 872cee3aa6bSSatish Balay 8735615d1e5SSatish Balay #undef __FUNC__ 8745615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 875ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 876cee3aa6bSSatish Balay { 877cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 878cee3aa6bSSatish Balay int ierr; 87943a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 880cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 88143a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 882cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 883cee3aa6bSSatish Balay return 0; 884cee3aa6bSSatish Balay } 885cee3aa6bSSatish Balay 8865615d1e5SSatish Balay #undef __FUNC__ 8875615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 888ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 889cee3aa6bSSatish Balay { 890cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 891cee3aa6bSSatish Balay int ierr; 892cee3aa6bSSatish Balay 893cee3aa6bSSatish Balay /* do nondiagonal part */ 894cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 895cee3aa6bSSatish Balay /* send it on its way */ 896537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 897cee3aa6bSSatish Balay /* do local part */ 898cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 899cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 900cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 901cee3aa6bSSatish Balay /* but is not perhaps always true. */ 902639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 903cee3aa6bSSatish Balay return 0; 904cee3aa6bSSatish Balay } 905cee3aa6bSSatish Balay 9065615d1e5SSatish Balay #undef __FUNC__ 9075615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 908ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 909cee3aa6bSSatish Balay { 910cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 911cee3aa6bSSatish Balay int ierr; 912cee3aa6bSSatish Balay 913cee3aa6bSSatish Balay /* do nondiagonal part */ 914cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 915cee3aa6bSSatish Balay /* send it on its way */ 916537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 917cee3aa6bSSatish Balay /* do local part */ 918cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 919cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 920cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 921cee3aa6bSSatish Balay /* but is not perhaps always true. */ 922537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 923cee3aa6bSSatish Balay return 0; 924cee3aa6bSSatish Balay } 925cee3aa6bSSatish Balay 926cee3aa6bSSatish Balay /* 927cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 928cee3aa6bSSatish Balay diagonal block 929cee3aa6bSSatish Balay */ 9305615d1e5SSatish Balay #undef __FUNC__ 9315615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 932ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 933cee3aa6bSSatish Balay { 934cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 935cee3aa6bSSatish Balay if (a->M != a->N) 936e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 937cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 938cee3aa6bSSatish Balay } 939cee3aa6bSSatish Balay 9405615d1e5SSatish Balay #undef __FUNC__ 9415615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 942ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 943cee3aa6bSSatish Balay { 944cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 945cee3aa6bSSatish Balay int ierr; 946cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 947cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 948cee3aa6bSSatish Balay return 0; 949cee3aa6bSSatish Balay } 950026e39d0SSatish Balay 9515615d1e5SSatish Balay #undef __FUNC__ 9525615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 953ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 95457b952d6SSatish Balay { 95557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 95657b952d6SSatish Balay *m = mat->M; *n = mat->N; 95757b952d6SSatish Balay return 0; 95857b952d6SSatish Balay } 95957b952d6SSatish Balay 9605615d1e5SSatish Balay #undef __FUNC__ 9615615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 962ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 96357b952d6SSatish Balay { 96457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 96557b952d6SSatish Balay *m = mat->m; *n = mat->N; 96657b952d6SSatish Balay return 0; 96757b952d6SSatish Balay } 96857b952d6SSatish Balay 9695615d1e5SSatish Balay #undef __FUNC__ 9705615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 971ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 97257b952d6SSatish Balay { 97357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 97457b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 97557b952d6SSatish Balay return 0; 97657b952d6SSatish Balay } 97757b952d6SSatish Balay 978acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 979acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 980acdf5bf4SSatish Balay 9815615d1e5SSatish Balay #undef __FUNC__ 9825615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 983acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 984acdf5bf4SSatish Balay { 985acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 986acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 987acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 988d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 989d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 990acdf5bf4SSatish Balay 991e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 992acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 993acdf5bf4SSatish Balay 994acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 995acdf5bf4SSatish Balay /* 996acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 997acdf5bf4SSatish Balay */ 998acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 999bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1000bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1001acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1002acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1003acdf5bf4SSatish Balay } 1004acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1005acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1006acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1007acdf5bf4SSatish Balay } 1008acdf5bf4SSatish Balay 1009acdf5bf4SSatish Balay 1010e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 1011d9d09a02SSatish Balay lrow = row - brstart; 1012acdf5bf4SSatish Balay 1013acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1014acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1015acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1016acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1017acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1018acdf5bf4SSatish Balay nztot = nzA + nzB; 1019acdf5bf4SSatish Balay 1020acdf5bf4SSatish Balay cmap = mat->garray; 1021acdf5bf4SSatish Balay if (v || idx) { 1022acdf5bf4SSatish Balay if (nztot) { 1023acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1024acdf5bf4SSatish Balay int imark = -1; 1025acdf5bf4SSatish Balay if (v) { 1026acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1027acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1028d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1029acdf5bf4SSatish Balay else break; 1030acdf5bf4SSatish Balay } 1031acdf5bf4SSatish Balay imark = i; 1032acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1033acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1034acdf5bf4SSatish Balay } 1035acdf5bf4SSatish Balay if (idx) { 1036acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1037acdf5bf4SSatish Balay if (imark > -1) { 1038acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1039bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1040acdf5bf4SSatish Balay } 1041acdf5bf4SSatish Balay } else { 1042acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1043d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1044d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1045acdf5bf4SSatish Balay else break; 1046acdf5bf4SSatish Balay } 1047acdf5bf4SSatish Balay imark = i; 1048acdf5bf4SSatish Balay } 1049d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1050d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1051acdf5bf4SSatish Balay } 1052acdf5bf4SSatish Balay } 1053d212a18eSSatish Balay else { 1054d212a18eSSatish Balay if (idx) *idx = 0; 1055d212a18eSSatish Balay if (v) *v = 0; 1056d212a18eSSatish Balay } 1057acdf5bf4SSatish Balay } 1058acdf5bf4SSatish Balay *nz = nztot; 1059acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1060acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1061acdf5bf4SSatish Balay return 0; 1062acdf5bf4SSatish Balay } 1063acdf5bf4SSatish Balay 10645615d1e5SSatish Balay #undef __FUNC__ 10655615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1066acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1067acdf5bf4SSatish Balay { 1068acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1069acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1070e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 1071acdf5bf4SSatish Balay } 1072acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 1073acdf5bf4SSatish Balay return 0; 1074acdf5bf4SSatish Balay } 1075acdf5bf4SSatish Balay 10765615d1e5SSatish Balay #undef __FUNC__ 10775615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1078ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 10795a838052SSatish Balay { 10805a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 10815a838052SSatish Balay *bs = baij->bs; 10825a838052SSatish Balay return 0; 10835a838052SSatish Balay } 10845a838052SSatish Balay 10855615d1e5SSatish Balay #undef __FUNC__ 10865615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1087ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 108858667388SSatish Balay { 108958667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 109058667388SSatish Balay int ierr; 109158667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 109258667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 109358667388SSatish Balay return 0; 109458667388SSatish Balay } 10950ac07820SSatish Balay 10965615d1e5SSatish Balay #undef __FUNC__ 10975615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1098ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 10990ac07820SSatish Balay { 11004e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 11014e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 11027d57db60SLois Curfman McInnes int ierr; 11037d57db60SLois Curfman McInnes double isend[5], irecv[5]; 11040ac07820SSatish Balay 11054e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 11064e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 11074e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 11084e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 11094e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 11104e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11114e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 11124e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11134e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 11140ac07820SSatish Balay if (flag == MAT_LOCAL) { 11154e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11164e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11174e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11184e220ebcSLois Curfman McInnes info->memory = isend[3]; 11194e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11200ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1121dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 11224e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11234e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11244e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11254e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11264e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11270ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1128dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 11294e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11304e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11314e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11324e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11334e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11340ac07820SSatish Balay } 11354e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 11364e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 11374e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 11380ac07820SSatish Balay return 0; 11390ac07820SSatish Balay } 11400ac07820SSatish Balay 11415615d1e5SSatish Balay #undef __FUNC__ 11425615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1143ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 114458667388SSatish Balay { 114558667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 114658667388SSatish Balay 114758667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 114858667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 11496da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1150c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 115196854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1152c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1153b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1154b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1155b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1156aeafbbfcSLois Curfman McInnes a->roworiented = 1; 115758667388SSatish Balay MatSetOption(a->A,op); 115858667388SSatish Balay MatSetOption(a->B,op); 1159b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11606da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 116158667388SSatish Balay op == MAT_SYMMETRIC || 116258667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 116358667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 116458667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 116558667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 116658667388SSatish Balay a->roworiented = 0; 116758667388SSatish Balay MatSetOption(a->A,op); 116858667388SSatish Balay MatSetOption(a->B,op); 11692b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 117090f02eecSBarry Smith a->donotstash = 1; 117190f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1172e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 117358667388SSatish Balay else 1174e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 117558667388SSatish Balay return 0; 117658667388SSatish Balay } 117758667388SSatish Balay 11785615d1e5SSatish Balay #undef __FUNC__ 11795615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1180ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 11810ac07820SSatish Balay { 11820ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 11830ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 11840ac07820SSatish Balay Mat B; 11850ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 11860ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 11870ac07820SSatish Balay Scalar *a; 11880ac07820SSatish Balay 11890ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 1190e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 11910ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 11920ac07820SSatish Balay CHKERRQ(ierr); 11930ac07820SSatish Balay 11940ac07820SSatish Balay /* copy over the A part */ 11950ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 11960ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 11970ac07820SSatish Balay row = baij->rstart; 11980ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 11990ac07820SSatish Balay 12000ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12010ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12020ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12030ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12040ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 12050ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12060ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12070ac07820SSatish Balay col++; a += bs; 12080ac07820SSatish Balay } 12090ac07820SSatish Balay } 12100ac07820SSatish Balay } 12110ac07820SSatish Balay /* copy over the B part */ 12120ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 12130ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12140ac07820SSatish Balay row = baij->rstart*bs; 12150ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12160ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12170ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12180ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12190ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12200ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12210ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12220ac07820SSatish Balay col++; a += bs; 12230ac07820SSatish Balay } 12240ac07820SSatish Balay } 12250ac07820SSatish Balay } 12260ac07820SSatish Balay PetscFree(rvals); 12270ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12280ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12290ac07820SSatish Balay 12300ac07820SSatish Balay if (matout != PETSC_NULL) { 12310ac07820SSatish Balay *matout = B; 12320ac07820SSatish Balay } else { 12330ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 12340ac07820SSatish Balay PetscFree(baij->rowners); 12350ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 12360ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 12370ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 12380ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 12390ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 12400ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 12410ac07820SSatish Balay PetscFree(baij); 1242*f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 12430ac07820SSatish Balay PetscHeaderDestroy(B); 12440ac07820SSatish Balay } 12450ac07820SSatish Balay return 0; 12460ac07820SSatish Balay } 12470e95ebc0SSatish Balay 12485615d1e5SSatish Balay #undef __FUNC__ 12495615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 12500e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 12510e95ebc0SSatish Balay { 12520e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 12530e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 12540e95ebc0SSatish Balay int ierr,s1,s2,s3; 12550e95ebc0SSatish Balay 12560e95ebc0SSatish Balay if (ll) { 12570e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 12580e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1259e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 12600e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 12610e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 12620e95ebc0SSatish Balay } 1263e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 12640e95ebc0SSatish Balay return 0; 12650e95ebc0SSatish Balay } 12660e95ebc0SSatish Balay 12670ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 12680ac07820SSatish Balay matrix is square and the column and row owerships are identical. 12690ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 12700ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 12710ac07820SSatish Balay routine. 12720ac07820SSatish Balay */ 12735615d1e5SSatish Balay #undef __FUNC__ 12745615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1275ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 12760ac07820SSatish Balay { 12770ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 12780ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 12790ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 12800ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 12810ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 12820ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 12830ac07820SSatish Balay MPI_Comm comm = A->comm; 12840ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 12850ac07820SSatish Balay MPI_Status recv_status,*send_status; 12860ac07820SSatish Balay IS istmp; 12870ac07820SSatish Balay 12880ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 12890ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 12900ac07820SSatish Balay 12910ac07820SSatish Balay /* first count number of contributors to each processor */ 12920ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 12930ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 12940ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 12950ac07820SSatish Balay for ( i=0; i<N; i++ ) { 12960ac07820SSatish Balay idx = rows[i]; 12970ac07820SSatish Balay found = 0; 12980ac07820SSatish Balay for ( j=0; j<size; j++ ) { 12990ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 13000ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 13010ac07820SSatish Balay } 13020ac07820SSatish Balay } 1303e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 13040ac07820SSatish Balay } 13050ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 13060ac07820SSatish Balay 13070ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 13080ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 13090ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 13100ac07820SSatish Balay nrecvs = work[rank]; 13110ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 13120ac07820SSatish Balay nmax = work[rank]; 13130ac07820SSatish Balay PetscFree(work); 13140ac07820SSatish Balay 13150ac07820SSatish Balay /* post receives: */ 13160ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 13170ac07820SSatish Balay CHKPTRQ(rvalues); 13180ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 13190ac07820SSatish Balay CHKPTRQ(recv_waits); 13200ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13210ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 13220ac07820SSatish Balay } 13230ac07820SSatish Balay 13240ac07820SSatish Balay /* do sends: 13250ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13260ac07820SSatish Balay the ith processor 13270ac07820SSatish Balay */ 13280ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 13290ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 13300ac07820SSatish Balay CHKPTRQ(send_waits); 13310ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 13320ac07820SSatish Balay starts[0] = 0; 13330ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13340ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13350ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 13360ac07820SSatish Balay } 13370ac07820SSatish Balay ISRestoreIndices(is,&rows); 13380ac07820SSatish Balay 13390ac07820SSatish Balay starts[0] = 0; 13400ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13410ac07820SSatish Balay count = 0; 13420ac07820SSatish Balay for ( i=0; i<size; i++ ) { 13430ac07820SSatish Balay if (procs[i]) { 13440ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 13450ac07820SSatish Balay } 13460ac07820SSatish Balay } 13470ac07820SSatish Balay PetscFree(starts); 13480ac07820SSatish Balay 13490ac07820SSatish Balay base = owners[rank]*bs; 13500ac07820SSatish Balay 13510ac07820SSatish Balay /* wait on receives */ 13520ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 13530ac07820SSatish Balay source = lens + nrecvs; 13540ac07820SSatish Balay count = nrecvs; slen = 0; 13550ac07820SSatish Balay while (count) { 13560ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 13570ac07820SSatish Balay /* unpack receives into our local space */ 13580ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 13590ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 13600ac07820SSatish Balay lens[imdex] = n; 13610ac07820SSatish Balay slen += n; 13620ac07820SSatish Balay count--; 13630ac07820SSatish Balay } 13640ac07820SSatish Balay PetscFree(recv_waits); 13650ac07820SSatish Balay 13660ac07820SSatish Balay /* move the data into the send scatter */ 13670ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 13680ac07820SSatish Balay count = 0; 13690ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13700ac07820SSatish Balay values = rvalues + i*nmax; 13710ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 13720ac07820SSatish Balay lrows[count++] = values[j] - base; 13730ac07820SSatish Balay } 13740ac07820SSatish Balay } 13750ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 13760ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 13770ac07820SSatish Balay 13780ac07820SSatish Balay /* actually zap the local rows */ 1379029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 13800ac07820SSatish Balay PLogObjectParent(A,istmp); 13810ac07820SSatish Balay PetscFree(lrows); 13820ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 13830ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 13840ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 13850ac07820SSatish Balay 13860ac07820SSatish Balay /* wait on sends */ 13870ac07820SSatish Balay if (nsends) { 13880ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 13890ac07820SSatish Balay CHKPTRQ(send_status); 13900ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 13910ac07820SSatish Balay PetscFree(send_status); 13920ac07820SSatish Balay } 13930ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 13940ac07820SSatish Balay 13950ac07820SSatish Balay return 0; 13960ac07820SSatish Balay } 1397ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 13985615d1e5SSatish Balay #undef __FUNC__ 13995615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1400ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1401ba4ca20aSSatish Balay { 1402ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1403ba4ca20aSSatish Balay 1404ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1405ba4ca20aSSatish Balay else return 0; 1406ba4ca20aSSatish Balay } 14070ac07820SSatish Balay 14085615d1e5SSatish Balay #undef __FUNC__ 14095615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1410ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1411bb5a7306SBarry Smith { 1412bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1413bb5a7306SBarry Smith int ierr; 1414bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1415bb5a7306SBarry Smith return 0; 1416bb5a7306SBarry Smith } 1417bb5a7306SBarry Smith 14180ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14190ac07820SSatish Balay 142079bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 142179bdfe76SSatish Balay static struct _MatOps MatOps = { 1422bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14234c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14244c50302cSBarry Smith 0,0,0,0, 14250ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14260e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 142758667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14284c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14294c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14304c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 143194a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1432d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1433ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1434bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1435ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 143679bdfe76SSatish Balay 143779bdfe76SSatish Balay 14385615d1e5SSatish Balay #undef __FUNC__ 14395615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 144079bdfe76SSatish Balay /*@C 144179bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 144279bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 144379bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 144479bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 144579bdfe76SSatish Balay performance can be increased by more than a factor of 50. 144679bdfe76SSatish Balay 144779bdfe76SSatish Balay Input Parameters: 144879bdfe76SSatish Balay . comm - MPI communicator 144979bdfe76SSatish Balay . bs - size of blockk 145079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 145192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 145292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 145392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 145492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 145592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 145679bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 145792e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 145879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 145979bdfe76SSatish Balay submatrix (same for all local rows) 146092e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 146192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 146292e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 146392e8d321SLois Curfman McInnes it is zero. 146492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 146579bdfe76SSatish Balay submatrix (same for all local rows). 146692e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 146792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 146892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 146979bdfe76SSatish Balay 147079bdfe76SSatish Balay Output Parameter: 147179bdfe76SSatish Balay . A - the matrix 147279bdfe76SSatish Balay 147379bdfe76SSatish Balay Notes: 147479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 147579bdfe76SSatish Balay (possibly both). 147679bdfe76SSatish Balay 147779bdfe76SSatish Balay Storage Information: 147879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 147979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 148079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 148179bdfe76SSatish Balay local matrix (a rectangular submatrix). 148279bdfe76SSatish Balay 148379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 148479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 148579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 148679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 148779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 148879bdfe76SSatish Balay 148979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 149079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 149179bdfe76SSatish Balay 149279bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 149379bdfe76SSatish Balay $ ------------------- 149479bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 149579bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 149679bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 149779bdfe76SSatish Balay $ ------------------- 149879bdfe76SSatish Balay $ 149979bdfe76SSatish Balay 150079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 150179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 150279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 150357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 150479bdfe76SSatish Balay 150579bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 150679bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 150779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 150892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 150992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15106da5968aSLois Curfman McInnes matrices. 151179bdfe76SSatish Balay 151292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 151379bdfe76SSatish Balay 151479bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 151579bdfe76SSatish Balay @*/ 151679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 151779bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 151879bdfe76SSatish Balay { 151979bdfe76SSatish Balay Mat B; 152079bdfe76SSatish Balay Mat_MPIBAIJ *b; 15214c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 152279bdfe76SSatish Balay 1523e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 152479bdfe76SSatish Balay *A = 0; 1525*f09e8eb9SSatish Balay PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 152679bdfe76SSatish Balay PLogObjectCreate(B); 152779bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 152879bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 152979bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15304c50302cSBarry Smith MPI_Comm_size(comm,&size); 15314c50302cSBarry Smith if (size == 1) { 15324c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 15334c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 15344c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 15354c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 15364c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 15374c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 15384c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 15394c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 15404c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 15414c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 15424c50302cSBarry Smith } 15434c50302cSBarry Smith 154479bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 154579bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 154690f02eecSBarry Smith B->mapping = 0; 154779bdfe76SSatish Balay B->factor = 0; 154879bdfe76SSatish Balay B->assembled = PETSC_FALSE; 154979bdfe76SSatish Balay 1550e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 155179bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 155279bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 155379bdfe76SSatish Balay 155479bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1555e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1556e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1557e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1558cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1559cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 156079bdfe76SSatish Balay 156179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 156279bdfe76SSatish Balay work[0] = m; work[1] = n; 156379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 156479bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 156579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 156679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 156779bdfe76SSatish Balay } 156879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 156979bdfe76SSatish Balay Mbs = M/bs; 1570e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 157179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 157279bdfe76SSatish Balay m = mbs*bs; 157379bdfe76SSatish Balay } 157479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 157579bdfe76SSatish Balay Nbs = N/bs; 1576e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 157779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 157879bdfe76SSatish Balay n = nbs*bs; 157979bdfe76SSatish Balay } 1580e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 158179bdfe76SSatish Balay 158279bdfe76SSatish Balay b->m = m; B->m = m; 158379bdfe76SSatish Balay b->n = n; B->n = n; 158479bdfe76SSatish Balay b->N = N; B->N = N; 158579bdfe76SSatish Balay b->M = M; B->M = M; 158679bdfe76SSatish Balay b->bs = bs; 158779bdfe76SSatish Balay b->bs2 = bs*bs; 158879bdfe76SSatish Balay b->mbs = mbs; 158979bdfe76SSatish Balay b->nbs = nbs; 159079bdfe76SSatish Balay b->Mbs = Mbs; 159179bdfe76SSatish Balay b->Nbs = Nbs; 159279bdfe76SSatish Balay 159379bdfe76SSatish Balay /* build local table of row and column ownerships */ 159479bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1595*f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 15960ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 159779bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 159879bdfe76SSatish Balay b->rowners[0] = 0; 159979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 160079bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 160179bdfe76SSatish Balay } 160279bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 160379bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 16044fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 16054fa0d573SSatish Balay b->rend_bs = b->rend * bs; 16064fa0d573SSatish Balay 160779bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 160879bdfe76SSatish Balay b->cowners[0] = 0; 160979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 161079bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 161179bdfe76SSatish Balay } 161279bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 161379bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 16144fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 16154fa0d573SSatish Balay b->cend_bs = b->cend * bs; 161679bdfe76SSatish Balay 161779bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1618029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 161979bdfe76SSatish Balay PLogObjectParent(B,b->A); 162079bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1621029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 162279bdfe76SSatish Balay PLogObjectParent(B,b->B); 162379bdfe76SSatish Balay 162479bdfe76SSatish Balay /* build cache for off array entries formed */ 162579bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 162690f02eecSBarry Smith b->donotstash = 0; 162779bdfe76SSatish Balay b->colmap = 0; 162879bdfe76SSatish Balay b->garray = 0; 162979bdfe76SSatish Balay b->roworiented = 1; 163079bdfe76SSatish Balay 163130793edcSSatish Balay /* stuff used in block assembly */ 163230793edcSSatish Balay b->barray = 0; 163330793edcSSatish Balay 163479bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 163579bdfe76SSatish Balay b->lvec = 0; 163679bdfe76SSatish Balay b->Mvctx = 0; 163779bdfe76SSatish Balay 163879bdfe76SSatish Balay /* stuff for MatGetRow() */ 163979bdfe76SSatish Balay b->rowindices = 0; 164079bdfe76SSatish Balay b->rowvalues = 0; 164179bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 164279bdfe76SSatish Balay 164379bdfe76SSatish Balay *A = B; 164479bdfe76SSatish Balay return 0; 164579bdfe76SSatish Balay } 1646026e39d0SSatish Balay 16475615d1e5SSatish Balay #undef __FUNC__ 16485615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 16490ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 16500ac07820SSatish Balay { 16510ac07820SSatish Balay Mat mat; 16520ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 16530ac07820SSatish Balay int ierr, len=0, flg; 16540ac07820SSatish Balay 16550ac07820SSatish Balay *newmat = 0; 1656*f09e8eb9SSatish Balay PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 16570ac07820SSatish Balay PLogObjectCreate(mat); 16580ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 16590ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 16600ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 16610ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 16620ac07820SSatish Balay mat->factor = matin->factor; 16630ac07820SSatish Balay mat->assembled = PETSC_TRUE; 16640ac07820SSatish Balay 16650ac07820SSatish Balay a->m = mat->m = oldmat->m; 16660ac07820SSatish Balay a->n = mat->n = oldmat->n; 16670ac07820SSatish Balay a->M = mat->M = oldmat->M; 16680ac07820SSatish Balay a->N = mat->N = oldmat->N; 16690ac07820SSatish Balay 16700ac07820SSatish Balay a->bs = oldmat->bs; 16710ac07820SSatish Balay a->bs2 = oldmat->bs2; 16720ac07820SSatish Balay a->mbs = oldmat->mbs; 16730ac07820SSatish Balay a->nbs = oldmat->nbs; 16740ac07820SSatish Balay a->Mbs = oldmat->Mbs; 16750ac07820SSatish Balay a->Nbs = oldmat->Nbs; 16760ac07820SSatish Balay 16770ac07820SSatish Balay a->rstart = oldmat->rstart; 16780ac07820SSatish Balay a->rend = oldmat->rend; 16790ac07820SSatish Balay a->cstart = oldmat->cstart; 16800ac07820SSatish Balay a->cend = oldmat->cend; 16810ac07820SSatish Balay a->size = oldmat->size; 16820ac07820SSatish Balay a->rank = oldmat->rank; 1683e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 16840ac07820SSatish Balay a->rowvalues = 0; 16850ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 168630793edcSSatish Balay a->barray = 0; 16870ac07820SSatish Balay 16880ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1689*f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 16900ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 16910ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 16920ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16930ac07820SSatish Balay if (oldmat->colmap) { 16940ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 16950ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 16960ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 16970ac07820SSatish Balay } else a->colmap = 0; 16980ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 16990ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 17000ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 17010ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 17020ac07820SSatish Balay } else a->garray = 0; 17030ac07820SSatish Balay 17040ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 17050ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 17060ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 17070ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 17080ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 17090ac07820SSatish Balay PLogObjectParent(mat,a->A); 17100ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 17110ac07820SSatish Balay PLogObjectParent(mat,a->B); 17120ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 17130ac07820SSatish Balay if (flg) { 17140ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 17150ac07820SSatish Balay } 17160ac07820SSatish Balay *newmat = mat; 17170ac07820SSatish Balay return 0; 17180ac07820SSatish Balay } 171957b952d6SSatish Balay 172057b952d6SSatish Balay #include "sys.h" 172157b952d6SSatish Balay 17225615d1e5SSatish Balay #undef __FUNC__ 17235615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 172457b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 172557b952d6SSatish Balay { 172657b952d6SSatish Balay Mat A; 172757b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 172857b952d6SSatish Balay Scalar *vals,*buf; 172957b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 173057b952d6SSatish Balay MPI_Status status; 1731cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 173257b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 173357b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 173457b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 173557b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 173657b952d6SSatish Balay 173757b952d6SSatish Balay 173857b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 173957b952d6SSatish Balay bs2 = bs*bs; 174057b952d6SSatish Balay 174157b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 174257b952d6SSatish Balay if (!rank) { 174357b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 174457b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1745e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 174657b952d6SSatish Balay } 174757b952d6SSatish Balay 174857b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 174957b952d6SSatish Balay M = header[1]; N = header[2]; 175057b952d6SSatish Balay 1751e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 175257b952d6SSatish Balay 175357b952d6SSatish Balay /* 175457b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 175557b952d6SSatish Balay divisible by the blocksize 175657b952d6SSatish Balay */ 175757b952d6SSatish Balay Mbs = M/bs; 175857b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 175957b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 176057b952d6SSatish Balay else Mbs++; 176157b952d6SSatish Balay if (extra_rows &&!rank) { 1762b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 176357b952d6SSatish Balay } 1764537820f0SBarry Smith 176557b952d6SSatish Balay /* determine ownership of all rows */ 176657b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 176757b952d6SSatish Balay m = mbs * bs; 1768cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1769cee3aa6bSSatish Balay browners = rowners + size + 1; 177057b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 177157b952d6SSatish Balay rowners[0] = 0; 1772cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1773cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 177457b952d6SSatish Balay rstart = rowners[rank]; 177557b952d6SSatish Balay rend = rowners[rank+1]; 177657b952d6SSatish Balay 177757b952d6SSatish Balay /* distribute row lengths to all processors */ 177857b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 177957b952d6SSatish Balay if (!rank) { 178057b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 178157b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 178257b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 178357b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1784cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1785cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 178657b952d6SSatish Balay PetscFree(sndcounts); 178757b952d6SSatish Balay } 178857b952d6SSatish Balay else { 178957b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 179057b952d6SSatish Balay } 179157b952d6SSatish Balay 179257b952d6SSatish Balay if (!rank) { 179357b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 179457b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 179557b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 179657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 179757b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 179857b952d6SSatish Balay procsnz[i] += rowlengths[j]; 179957b952d6SSatish Balay } 180057b952d6SSatish Balay } 180157b952d6SSatish Balay PetscFree(rowlengths); 180257b952d6SSatish Balay 180357b952d6SSatish Balay /* determine max buffer needed and allocate it */ 180457b952d6SSatish Balay maxnz = 0; 180557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 180657b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 180757b952d6SSatish Balay } 180857b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 180957b952d6SSatish Balay 181057b952d6SSatish Balay /* read in my part of the matrix column indices */ 181157b952d6SSatish Balay nz = procsnz[0]; 181257b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 181357b952d6SSatish Balay mycols = ibuf; 1814cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 181557b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1816cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1817cee3aa6bSSatish Balay 181857b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 181957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 182057b952d6SSatish Balay nz = procsnz[i]; 182157b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 182257b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 182357b952d6SSatish Balay } 182457b952d6SSatish Balay /* read in the stuff for the last proc */ 182557b952d6SSatish Balay if ( size != 1 ) { 182657b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 182757b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 182857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1829cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 183057b952d6SSatish Balay } 183157b952d6SSatish Balay PetscFree(cols); 183257b952d6SSatish Balay } 183357b952d6SSatish Balay else { 183457b952d6SSatish Balay /* determine buffer space needed for message */ 183557b952d6SSatish Balay nz = 0; 183657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 183757b952d6SSatish Balay nz += locrowlens[i]; 183857b952d6SSatish Balay } 183957b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 184057b952d6SSatish Balay mycols = ibuf; 184157b952d6SSatish Balay /* receive message of column indices*/ 184257b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 184357b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1844e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 184557b952d6SSatish Balay } 184657b952d6SSatish Balay 184757b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1848cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1849cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 185057b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1851cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 185257b952d6SSatish Balay masked1 = mask + Mbs; 185357b952d6SSatish Balay masked2 = masked1 + Mbs; 185457b952d6SSatish Balay rowcount = 0; nzcount = 0; 185557b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 185657b952d6SSatish Balay dcount = 0; 185757b952d6SSatish Balay odcount = 0; 185857b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 185957b952d6SSatish Balay kmax = locrowlens[rowcount]; 186057b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 186157b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 186257b952d6SSatish Balay if (!mask[tmp]) { 186357b952d6SSatish Balay mask[tmp] = 1; 186457b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 186557b952d6SSatish Balay else masked1[dcount++] = tmp; 186657b952d6SSatish Balay } 186757b952d6SSatish Balay } 186857b952d6SSatish Balay rowcount++; 186957b952d6SSatish Balay } 1870cee3aa6bSSatish Balay 187157b952d6SSatish Balay dlens[i] = dcount; 187257b952d6SSatish Balay odlens[i] = odcount; 1873cee3aa6bSSatish Balay 187457b952d6SSatish Balay /* zero out the mask elements we set */ 187557b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 187657b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 187757b952d6SSatish Balay } 1878cee3aa6bSSatish Balay 187957b952d6SSatish Balay /* create our matrix */ 1880537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1881537820f0SBarry Smith CHKERRQ(ierr); 188257b952d6SSatish Balay A = *newmat; 18836d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 188457b952d6SSatish Balay 188557b952d6SSatish Balay if (!rank) { 188657b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 188757b952d6SSatish Balay /* read in my part of the matrix numerical values */ 188857b952d6SSatish Balay nz = procsnz[0]; 188957b952d6SSatish Balay vals = buf; 1890cee3aa6bSSatish Balay mycols = ibuf; 1891cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 189257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1893cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1894537820f0SBarry Smith 189557b952d6SSatish Balay /* insert into matrix */ 189657b952d6SSatish Balay jj = rstart*bs; 189757b952d6SSatish Balay for ( i=0; i<m; i++ ) { 189857b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 189957b952d6SSatish Balay mycols += locrowlens[i]; 190057b952d6SSatish Balay vals += locrowlens[i]; 190157b952d6SSatish Balay jj++; 190257b952d6SSatish Balay } 190357b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 190457b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 190557b952d6SSatish Balay nz = procsnz[i]; 190657b952d6SSatish Balay vals = buf; 190757b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 190857b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 190957b952d6SSatish Balay } 191057b952d6SSatish Balay /* the last proc */ 191157b952d6SSatish Balay if ( size != 1 ){ 191257b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1913cee3aa6bSSatish Balay vals = buf; 191457b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 191557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1916cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 191757b952d6SSatish Balay } 191857b952d6SSatish Balay PetscFree(procsnz); 191957b952d6SSatish Balay } 192057b952d6SSatish Balay else { 192157b952d6SSatish Balay /* receive numeric values */ 192257b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 192357b952d6SSatish Balay 192457b952d6SSatish Balay /* receive message of values*/ 192557b952d6SSatish Balay vals = buf; 1926cee3aa6bSSatish Balay mycols = ibuf; 192757b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 192857b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1929e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 193057b952d6SSatish Balay 193157b952d6SSatish Balay /* insert into matrix */ 193257b952d6SSatish Balay jj = rstart*bs; 1933cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 193457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 193557b952d6SSatish Balay mycols += locrowlens[i]; 193657b952d6SSatish Balay vals += locrowlens[i]; 193757b952d6SSatish Balay jj++; 193857b952d6SSatish Balay } 193957b952d6SSatish Balay } 194057b952d6SSatish Balay PetscFree(locrowlens); 194157b952d6SSatish Balay PetscFree(buf); 194257b952d6SSatish Balay PetscFree(ibuf); 194357b952d6SSatish Balay PetscFree(rowners); 194457b952d6SSatish Balay PetscFree(dlens); 1945cee3aa6bSSatish Balay PetscFree(mask); 19466d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19476d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 194857b952d6SSatish Balay return 0; 194957b952d6SSatish Balay } 195057b952d6SSatish Balay 195157b952d6SSatish Balay 1952