1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*596b8d2eSBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.75 1997/07/29 14:10:02 bsmith Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2257b952d6SSatish Balay 233b2fbd54SBarry Smith 24537820f0SBarry Smith /* 25537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2857b952d6SSatish Balay length of colmap equals the global matrix length. 2957b952d6SSatish Balay */ 305615d1e5SSatish Balay #undef __FUNC__ 315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3357b952d6SSatish Balay { 3457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3757b952d6SSatish Balay 38758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3957b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4057b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4257b952d6SSatish Balay return 0; 4357b952d6SSatish Balay } 4457b952d6SSatish Balay 455615d1e5SSatish Balay #undef __FUNC__ 465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 4957b952d6SSatish Balay { 503b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5157b952d6SSatish Balay int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 553b2fbd54SBarry Smith return 0; 563b2fbd54SBarry Smith } 573b2fbd54SBarry Smith 585615d1e5SSatish Balay #undef __FUNC__ 595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 613b2fbd54SBarry Smith PetscTruth *done) 623b2fbd54SBarry Smith { 633b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 643b2fbd54SBarry Smith int ierr; 653b2fbd54SBarry Smith if (aij->size == 1) { 663b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6857b952d6SSatish Balay return 0; 6957b952d6SSatish Balay } 7080c1aa95SSatish Balay #define CHUNKSIZE 10 7180c1aa95SSatish Balay 72f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7380c1aa95SSatish Balay { \ 7480c1aa95SSatish Balay \ 7580c1aa95SSatish Balay brow = row/bs; \ 7680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 7880c1aa95SSatish Balay bcol = col/bs; \ 7980c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 80ab26458aSBarry Smith low = 0; high = nrow; \ 81ab26458aSBarry Smith while (high-low > 3) { \ 82ab26458aSBarry Smith t = (low+high)/2; \ 83ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 84ab26458aSBarry Smith else low = t; \ 85ab26458aSBarry Smith } \ 86ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 8780c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8880c1aa95SSatish Balay if (rp[_i] == bcol) { \ 8980c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 90eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 91eada6651SSatish Balay else *bap = value; \ 92ac7a638eSSatish Balay goto a_noinsert; \ 9380c1aa95SSatish Balay } \ 9480c1aa95SSatish Balay } \ 9589280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 9611ebbc71SLois Curfman McInnes else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 9780c1aa95SSatish Balay if (nrow >= rmax) { \ 9880c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9980c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 10080c1aa95SSatish Balay Scalar *new_a; \ 10180c1aa95SSatish Balay \ 10211ebbc71SLois Curfman McInnes if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 10389280ab3SLois Curfman McInnes \ 10480c1aa95SSatish Balay /* malloc new storage space */ \ 10580c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 10680c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 10780c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10880c1aa95SSatish Balay new_i = new_j + new_nz; \ 10980c1aa95SSatish Balay \ 11080c1aa95SSatish Balay /* copy over old data into new slots */ \ 11180c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 11280c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 11380c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 11480c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 11580c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 11680c1aa95SSatish Balay len*sizeof(int)); \ 11780c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11880c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11980c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 12080c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 12180c1aa95SSatish Balay /* free up old matrix storage */ \ 12280c1aa95SSatish Balay PetscFree(a->a); \ 12380c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 12480c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 12580c1aa95SSatish Balay a->singlemalloc = 1; \ 12680c1aa95SSatish Balay \ 12780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 128ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 12980c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 13080c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 13180c1aa95SSatish Balay a->reallocs++; \ 13280c1aa95SSatish Balay a->nz++; \ 13380c1aa95SSatish Balay } \ 13480c1aa95SSatish Balay N = nrow++ - 1; \ 13580c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 13680c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 13780c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13880c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13980c1aa95SSatish Balay } \ 14080c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 14180c1aa95SSatish Balay rp[_i] = bcol; \ 14280c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 143ac7a638eSSatish Balay a_noinsert:; \ 14480c1aa95SSatish Balay ailen[brow] = nrow; \ 14580c1aa95SSatish Balay } 14657b952d6SSatish Balay 147ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148ac7a638eSSatish Balay { \ 149ac7a638eSSatish Balay \ 150ac7a638eSSatish Balay brow = row/bs; \ 151ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 153ac7a638eSSatish Balay bcol = col/bs; \ 154ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 155ac7a638eSSatish Balay low = 0; high = nrow; \ 156ac7a638eSSatish Balay while (high-low > 3) { \ 157ac7a638eSSatish Balay t = (low+high)/2; \ 158ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 159ac7a638eSSatish Balay else low = t; \ 160ac7a638eSSatish Balay } \ 161ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 162ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 163ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 164ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 165ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 166ac7a638eSSatish Balay else *bap = value; \ 167ac7a638eSSatish Balay goto b_noinsert; \ 168ac7a638eSSatish Balay } \ 169ac7a638eSSatish Balay } \ 17089280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 17111ebbc71SLois Curfman McInnes else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 172ac7a638eSSatish Balay if (nrow >= rmax) { \ 173ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 17474c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 175ac7a638eSSatish Balay Scalar *new_a; \ 176ac7a638eSSatish Balay \ 17711ebbc71SLois Curfman McInnes if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 17889280ab3SLois Curfman McInnes \ 179ac7a638eSSatish Balay /* malloc new storage space */ \ 18074c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 181ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 182ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 183ac7a638eSSatish Balay new_i = new_j + new_nz; \ 184ac7a638eSSatish Balay \ 185ac7a638eSSatish Balay /* copy over old data into new slots */ \ 186ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 18774c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 188ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 189ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 190ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 191ac7a638eSSatish Balay len*sizeof(int)); \ 192ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 193ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 194ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 195ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 196ac7a638eSSatish Balay /* free up old matrix storage */ \ 19774c639caSSatish Balay PetscFree(b->a); \ 19874c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 19974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 20074c639caSSatish Balay b->singlemalloc = 1; \ 201ac7a638eSSatish Balay \ 202ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 203ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 20474c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 20574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 20674c639caSSatish Balay b->reallocs++; \ 20774c639caSSatish Balay b->nz++; \ 208ac7a638eSSatish Balay } \ 209ac7a638eSSatish Balay N = nrow++ - 1; \ 210ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 211ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 212ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 213ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 214ac7a638eSSatish Balay } \ 215ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 216ac7a638eSSatish Balay rp[_i] = bcol; \ 217ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 218ac7a638eSSatish Balay b_noinsert:; \ 219ac7a638eSSatish Balay bilen[brow] = nrow; \ 220ac7a638eSSatish Balay } 221ac7a638eSSatish Balay 2225615d1e5SSatish Balay #undef __FUNC__ 2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 22557b952d6SSatish Balay { 22657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22757b952d6SSatish Balay Scalar value; 2284fa0d573SSatish Balay int ierr,i,j,row,col; 2294fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2304fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2314fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 23257b952d6SSatish Balay 233eada6651SSatish Balay /* Some Variables required in the macro */ 23480c1aa95SSatish Balay Mat A = baij->A; 23580c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 236ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 237ac7a638eSSatish Balay Scalar *aa=a->a; 238ac7a638eSSatish Balay 239ac7a638eSSatish Balay Mat B = baij->B; 240ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 241ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 242ac7a638eSSatish Balay Scalar *ba=b->a; 243ac7a638eSSatish Balay 244ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 245ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 246ac7a638eSSatish Balay Scalar *ap,*bap; 24780c1aa95SSatish Balay 24857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 250e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 251e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 252639f9d9dSBarry Smith #endif 25357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 25457b952d6SSatish Balay row = im[i] - rstart_orig; 25557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 25657b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 25757b952d6SSatish Balay col = in[j] - cstart_orig; 25857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 26080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 26157b952d6SSatish Balay } 262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 263e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 264e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 265639f9d9dSBarry Smith #endif 26657b952d6SSatish Balay else { 26757b952d6SSatish Balay if (mat->was_assembled) { 268905e6a2fSBarry Smith if (!baij->colmap) { 269905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 270905e6a2fSBarry Smith } 271905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 27257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 27357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 27457b952d6SSatish Balay col = in[j]; 2759bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2769bf004c3SSatish Balay B = baij->B; 2779bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2789bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2799bf004c3SSatish Balay ba=b->a; 28057b952d6SSatish Balay } 28157b952d6SSatish Balay } 28257b952d6SSatish Balay else col = in[j]; 28357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 284ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 285ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 28657b952d6SSatish Balay } 28757b952d6SSatish Balay } 28857b952d6SSatish Balay } 28957b952d6SSatish Balay else { 29090f02eecSBarry Smith if (roworiented && !baij->donotstash) { 29157b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 29257b952d6SSatish Balay } 29357b952d6SSatish Balay else { 29490f02eecSBarry Smith if (!baij->donotstash) { 29557b952d6SSatish Balay row = im[i]; 29657b952d6SSatish Balay for ( j=0; j<n; j++ ) { 29757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 29857b952d6SSatish Balay } 29957b952d6SSatish Balay } 30057b952d6SSatish Balay } 30157b952d6SSatish Balay } 30290f02eecSBarry Smith } 30357b952d6SSatish Balay return 0; 30457b952d6SSatish Balay } 30557b952d6SSatish Balay 306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 307ab26458aSBarry Smith #undef __FUNC__ 308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 310ab26458aSBarry Smith { 311ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 31230793edcSSatish Balay Scalar *value,*barray=baij->barray; 313abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 314ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 315ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 316ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 317ab26458aSBarry Smith 31830793edcSSatish Balay 31930793edcSSatish Balay if(!barray) { 32047513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 32130793edcSSatish Balay } 32230793edcSSatish Balay 323ab26458aSBarry Smith if (roworiented) { 324ab26458aSBarry Smith stepval = (n-1)*bs; 325ab26458aSBarry Smith } else { 326ab26458aSBarry Smith stepval = (m-1)*bs; 327ab26458aSBarry Smith } 328ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 329ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 330ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 331ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 332ab26458aSBarry Smith #endif 333ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 334ab26458aSBarry Smith row = im[i] - rstart; 335ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 33615b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 33715b57d14SSatish Balay if ((roworiented) && (n == 1)) { 33815b57d14SSatish Balay barray = v + i*bs2; 33915b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 34015b57d14SSatish Balay barray = v + j*bs2; 34115b57d14SSatish Balay } else { /* Here a copy is required */ 342ab26458aSBarry Smith if (roworiented) { 343ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 344ab26458aSBarry Smith } else { 345ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 346abef11f7SSatish Balay } 34747513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 34847513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 34930793edcSSatish Balay *barray++ = *value++; 35047513183SBarry Smith } 35147513183SBarry Smith } 35230793edcSSatish Balay barray -=bs2; 35315b57d14SSatish Balay } 354abef11f7SSatish Balay 355abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 356abef11f7SSatish Balay col = in[j] - cstart; 35730793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 358ab26458aSBarry Smith } 359ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 360ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 36147513183SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");} 362ab26458aSBarry Smith #endif 363ab26458aSBarry Smith else { 364ab26458aSBarry Smith if (mat->was_assembled) { 365ab26458aSBarry Smith if (!baij->colmap) { 366ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 367ab26458aSBarry Smith } 368a5eb4965SSatish Balay 369a5eb4965SSatish Balay #if defined(PETSC_BOPT_g) 370a5eb4965SSatish Balay if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");} 371a5eb4965SSatish Balay #endif 372a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 373ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 374ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 375ab26458aSBarry Smith col = in[j]; 376ab26458aSBarry Smith } 377ab26458aSBarry Smith } 378ab26458aSBarry Smith else col = in[j]; 37930793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 380ab26458aSBarry Smith } 381ab26458aSBarry Smith } 382ab26458aSBarry Smith } 383ab26458aSBarry Smith else { 384ab26458aSBarry Smith if (!baij->donotstash) { 385ab26458aSBarry Smith if (roworiented ) { 386abef11f7SSatish Balay row = im[i]*bs; 387abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 388abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 389abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 390abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 391abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 392abef11f7SSatish Balay } 393ab26458aSBarry Smith } 394ab26458aSBarry Smith } 395ab26458aSBarry Smith } 396ab26458aSBarry Smith else { 397ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 398abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 399abef11f7SSatish Balay col = in[j]*bs; 400abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 401abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 402abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 403ab26458aSBarry Smith } 404ab26458aSBarry Smith } 405ab26458aSBarry Smith } 406abef11f7SSatish Balay } 407abef11f7SSatish Balay } 408ab26458aSBarry Smith } 409ab26458aSBarry Smith } 410ab26458aSBarry Smith return 0; 411ab26458aSBarry Smith } 412ab26458aSBarry Smith 4135615d1e5SSatish Balay #undef __FUNC__ 4145615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 415ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 416d6de1c52SSatish Balay { 417d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 418d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 419d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 420d6de1c52SSatish Balay 421d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 422e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 423e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 424d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 425d6de1c52SSatish Balay row = idxm[i] - bsrstart; 426d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 427e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 428e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 429d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 430d6de1c52SSatish Balay col = idxn[j] - bscstart; 431d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 432d6de1c52SSatish Balay } 433d6de1c52SSatish Balay else { 434905e6a2fSBarry Smith if (!baij->colmap) { 435905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 436905e6a2fSBarry Smith } 437e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 438dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 439d9d09a02SSatish Balay else { 440dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 441d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 442d6de1c52SSatish Balay } 443d6de1c52SSatish Balay } 444d6de1c52SSatish Balay } 445d9d09a02SSatish Balay } 446d6de1c52SSatish Balay else { 447e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 448d6de1c52SSatish Balay } 449d6de1c52SSatish Balay } 450d6de1c52SSatish Balay return 0; 451d6de1c52SSatish Balay } 452d6de1c52SSatish Balay 4535615d1e5SSatish Balay #undef __FUNC__ 4545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 455ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 456d6de1c52SSatish Balay { 457d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 458d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 459acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 460d6de1c52SSatish Balay double sum = 0.0; 461d6de1c52SSatish Balay Scalar *v; 462d6de1c52SSatish Balay 463d6de1c52SSatish Balay if (baij->size == 1) { 464d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 465d6de1c52SSatish Balay } else { 466d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 467d6de1c52SSatish Balay v = amat->a; 468d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 469d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 470d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 471d6de1c52SSatish Balay #else 472d6de1c52SSatish Balay sum += (*v)*(*v); v++; 473d6de1c52SSatish Balay #endif 474d6de1c52SSatish Balay } 475d6de1c52SSatish Balay v = bmat->a; 476d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 477d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 478d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 479d6de1c52SSatish Balay #else 480d6de1c52SSatish Balay sum += (*v)*(*v); v++; 481d6de1c52SSatish Balay #endif 482d6de1c52SSatish Balay } 483d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 484d6de1c52SSatish Balay *norm = sqrt(*norm); 485d6de1c52SSatish Balay } 486acdf5bf4SSatish Balay else 487e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 488d6de1c52SSatish Balay } 489d6de1c52SSatish Balay return 0; 490d6de1c52SSatish Balay } 49157b952d6SSatish Balay 4925615d1e5SSatish Balay #undef __FUNC__ 4935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 494ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 49557b952d6SSatish Balay { 49657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 49757b952d6SSatish Balay MPI_Comm comm = mat->comm; 49857b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 49957b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 50057b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 50157b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 50257b952d6SSatish Balay InsertMode addv; 50357b952d6SSatish Balay Scalar *rvalues,*svalues; 50457b952d6SSatish Balay 50557b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 506e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 50757b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 508e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 50957b952d6SSatish Balay } 510e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 51157b952d6SSatish Balay 51257b952d6SSatish Balay /* first count number of contributors to each processor */ 51357b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 51457b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 51557b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 51657b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 51757b952d6SSatish Balay idx = baij->stash.idx[i]; 51857b952d6SSatish Balay for ( j=0; j<size; j++ ) { 51957b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 52057b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 52157b952d6SSatish Balay } 52257b952d6SSatish Balay } 52357b952d6SSatish Balay } 52457b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 52557b952d6SSatish Balay 52657b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 52757b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 52857b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 52957b952d6SSatish Balay nreceives = work[rank]; 53057b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 53157b952d6SSatish Balay nmax = work[rank]; 53257b952d6SSatish Balay PetscFree(work); 53357b952d6SSatish Balay 53457b952d6SSatish Balay /* post receives: 53557b952d6SSatish Balay 1) each message will consist of ordered pairs 53657b952d6SSatish Balay (global index,value) we store the global index as a double 53757b952d6SSatish Balay to simplify the message passing. 53857b952d6SSatish Balay 2) since we don't know how long each individual message is we 53957b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 54057b952d6SSatish Balay this is a lot of wasted space. 54157b952d6SSatish Balay 54257b952d6SSatish Balay 54357b952d6SSatish Balay This could be done better. 54457b952d6SSatish Balay */ 54557b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 54657b952d6SSatish Balay CHKPTRQ(rvalues); 54757b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 54857b952d6SSatish Balay CHKPTRQ(recv_waits); 54957b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 55057b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 55157b952d6SSatish Balay comm,recv_waits+i); 55257b952d6SSatish Balay } 55357b952d6SSatish Balay 55457b952d6SSatish Balay /* do sends: 55557b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 55657b952d6SSatish Balay the ith processor 55757b952d6SSatish Balay */ 55857b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 55957b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 56057b952d6SSatish Balay CHKPTRQ(send_waits); 56157b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 56257b952d6SSatish Balay starts[0] = 0; 56357b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 56457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 56557b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 56657b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 56757b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 56857b952d6SSatish Balay } 56957b952d6SSatish Balay PetscFree(owner); 57057b952d6SSatish Balay starts[0] = 0; 57157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 57257b952d6SSatish Balay count = 0; 57357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 57457b952d6SSatish Balay if (procs[i]) { 57557b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 57657b952d6SSatish Balay comm,send_waits+count++); 57757b952d6SSatish Balay } 57857b952d6SSatish Balay } 57957b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 58057b952d6SSatish Balay 58157b952d6SSatish Balay /* Free cache space */ 582d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 58357b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 58457b952d6SSatish Balay 58557b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 58657b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 58757b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 58857b952d6SSatish Balay baij->rmax = nmax; 58957b952d6SSatish Balay 59057b952d6SSatish Balay return 0; 59157b952d6SSatish Balay } 592*596b8d2eSBarry Smith #include <math.h> 593*596b8d2eSBarry Smith #define HASH_KEY 0.6180339887 594*596b8d2eSBarry Smith #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))+1) 59557b952d6SSatish Balay 596*596b8d2eSBarry Smith int CreateHashTable(Mat mat) 597*596b8d2eSBarry Smith { 598*596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 599*596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 600*596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 601*596b8d2eSBarry Smith int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 602*596b8d2eSBarry Smith int size=1.5*nz,ct=0,max=0; 603*596b8d2eSBarry Smith Scalar *aa=a->a,*ba=b->a; 604*596b8d2eSBarry Smith double key; 605*596b8d2eSBarry Smith static double *HT; 606*596b8d2eSBarry Smith static int flag=1; 607*596b8d2eSBarry Smith 608*596b8d2eSBarry Smith 609*596b8d2eSBarry Smith /* Allocate Memory for Hash Table */ 610*596b8d2eSBarry Smith if (flag) { 611*596b8d2eSBarry Smith HT = (double*)PetscMalloc(size*sizeof(double)); 612*596b8d2eSBarry Smith flag = 0; 613*596b8d2eSBarry Smith } 614*596b8d2eSBarry Smith PetscMemzero(HT,size*sizeof(double)); 615*596b8d2eSBarry Smith 616*596b8d2eSBarry Smith /* Loop Over A */ 617*596b8d2eSBarry Smith for ( i=0; i<a->n; i++ ) { 618*596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 619*596b8d2eSBarry Smith key = i*baij->n+aj[j]+1; 620*596b8d2eSBarry Smith h1 = HASH1(size,key); 621*596b8d2eSBarry Smith 622*596b8d2eSBarry Smith for ( k=1; k<size; k++ ){ 623*596b8d2eSBarry Smith if (HT[(h1*k)%size] == 0.0) { 624*596b8d2eSBarry Smith HT[(h1*k)%size] = key; 625*596b8d2eSBarry Smith break; 626*596b8d2eSBarry Smith } else ct++; 627*596b8d2eSBarry Smith } 628*596b8d2eSBarry Smith if (k> max) max =k; 629*596b8d2eSBarry Smith } 630*596b8d2eSBarry Smith } 631*596b8d2eSBarry Smith printf("***max1 = %d\n",max); 632*596b8d2eSBarry Smith /* Loop Over B */ 633*596b8d2eSBarry Smith for ( i=0; i<b->n; i++ ) { 634*596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 635*596b8d2eSBarry Smith key = i*b->n+bj[j]+1; 636*596b8d2eSBarry Smith h1 = HASH1(size,key); 637*596b8d2eSBarry Smith for ( k=1; k<size; k++ ){ 638*596b8d2eSBarry Smith if (HT[(h1*k)%size] == 0.0) { 639*596b8d2eSBarry Smith HT[(h1*k)%size] = key; 640*596b8d2eSBarry Smith break; 641*596b8d2eSBarry Smith } else ct++; 642*596b8d2eSBarry Smith } 643*596b8d2eSBarry Smith if (k> max) max =k; 644*596b8d2eSBarry Smith } 645*596b8d2eSBarry Smith } 646*596b8d2eSBarry Smith 647*596b8d2eSBarry Smith printf("***max2 = %d\n",max); 648*596b8d2eSBarry Smith /* Print Summary */ 649*596b8d2eSBarry Smith for ( i=0,key=0.0,j=0; i<size; i++) 650*596b8d2eSBarry Smith if (HT[i]) {j++;} 651*596b8d2eSBarry Smith 652*596b8d2eSBarry Smith printf("Size %d Average Buckets %d no of Keys %d\n",size,ct,j); 653*596b8d2eSBarry Smith return 0; 654*596b8d2eSBarry Smith } 65557b952d6SSatish Balay 6565615d1e5SSatish Balay #undef __FUNC__ 6575615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 658ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 65957b952d6SSatish Balay { 66057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 66157b952d6SSatish Balay MPI_Status *send_status,recv_status; 66257b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 663*596b8d2eSBarry Smith int bs=baij->bs,row,col,other_disassembled,flg; 66457b952d6SSatish Balay Scalar *values,val; 665e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 66657b952d6SSatish Balay 66757b952d6SSatish Balay /* wait on receives */ 66857b952d6SSatish Balay while (count) { 66957b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 67057b952d6SSatish Balay /* unpack receives into our local space */ 67157b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 67257b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 67357b952d6SSatish Balay n = n/3; 67457b952d6SSatish Balay for ( i=0; i<n; i++ ) { 67557b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 67657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 67757b952d6SSatish Balay val = values[3*i+2]; 67857b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 67957b952d6SSatish Balay col -= baij->cstart*bs; 6806fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 68157b952d6SSatish Balay } 68257b952d6SSatish Balay else { 68357b952d6SSatish Balay if (mat->was_assembled) { 684905e6a2fSBarry Smith if (!baij->colmap) { 685905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 686905e6a2fSBarry Smith } 687a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 68857b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 68957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 69057b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 69157b952d6SSatish Balay } 69257b952d6SSatish Balay } 6936fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 69457b952d6SSatish Balay } 69557b952d6SSatish Balay } 69657b952d6SSatish Balay count--; 69757b952d6SSatish Balay } 69857b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 69957b952d6SSatish Balay 70057b952d6SSatish Balay /* wait on sends */ 70157b952d6SSatish Balay if (baij->nsends) { 70257b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 70357b952d6SSatish Balay CHKPTRQ(send_status); 70457b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 70557b952d6SSatish Balay PetscFree(send_status); 70657b952d6SSatish Balay } 70757b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 70857b952d6SSatish Balay 70957b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 71057b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 71157b952d6SSatish Balay 71257b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 71357b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 71457b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 71557b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 71657b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 71757b952d6SSatish Balay } 71857b952d6SSatish Balay 7196d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 72057b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 72157b952d6SSatish Balay } 72257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 72357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 72457b952d6SSatish Balay 725*596b8d2eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 726*596b8d2eSBarry Smith if (flg) CreateHashTable(mat); 72757b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 72857b952d6SSatish Balay return 0; 72957b952d6SSatish Balay } 73057b952d6SSatish Balay 7315615d1e5SSatish Balay #undef __FUNC__ 7325615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 73357b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 73457b952d6SSatish Balay { 73557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 73657b952d6SSatish Balay int ierr; 73757b952d6SSatish Balay 73857b952d6SSatish Balay if (baij->size == 1) { 73957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 74057b952d6SSatish Balay } 741e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 74257b952d6SSatish Balay return 0; 74357b952d6SSatish Balay } 74457b952d6SSatish Balay 7455615d1e5SSatish Balay #undef __FUNC__ 7465615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 74757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 74857b952d6SSatish Balay { 74957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 750cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 75157b952d6SSatish Balay FILE *fd; 75257b952d6SSatish Balay ViewerType vtype; 75357b952d6SSatish Balay 75457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 75557b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 75657b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 757639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7584e220ebcSLois Curfman McInnes MatInfo info; 75957b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 76057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7614e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 76257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 76357b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 7644e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 7654e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 7664e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 7674e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 7684e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 7694e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 77057b952d6SSatish Balay fflush(fd); 77157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 77257b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 77357b952d6SSatish Balay return 0; 77457b952d6SSatish Balay } 775639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 776bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 77757b952d6SSatish Balay return 0; 77857b952d6SSatish Balay } 77957b952d6SSatish Balay } 78057b952d6SSatish Balay 78157b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 78257b952d6SSatish Balay Draw draw; 78357b952d6SSatish Balay PetscTruth isnull; 78457b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 78557b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 78657b952d6SSatish Balay } 78757b952d6SSatish Balay 78857b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 78957b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 79057b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 79157b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 79257b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 79357b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 79457b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 79557b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 79657b952d6SSatish Balay fflush(fd); 79757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 79857b952d6SSatish Balay } 79957b952d6SSatish Balay else { 80057b952d6SSatish Balay int size = baij->size; 80157b952d6SSatish Balay rank = baij->rank; 80257b952d6SSatish Balay if (size == 1) { 80357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 80457b952d6SSatish Balay } 80557b952d6SSatish Balay else { 80657b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 80757b952d6SSatish Balay Mat A; 80857b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 80957b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 81057b952d6SSatish Balay int mbs=baij->mbs; 81157b952d6SSatish Balay Scalar *a; 81257b952d6SSatish Balay 81357b952d6SSatish Balay if (!rank) { 814cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 81557b952d6SSatish Balay CHKERRQ(ierr); 81657b952d6SSatish Balay } 81757b952d6SSatish Balay else { 818cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 81957b952d6SSatish Balay CHKERRQ(ierr); 82057b952d6SSatish Balay } 82157b952d6SSatish Balay PLogObjectParent(mat,A); 82257b952d6SSatish Balay 82357b952d6SSatish Balay /* copy over the A part */ 82457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 82557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 82657b952d6SSatish Balay row = baij->rstart; 82757b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 82857b952d6SSatish Balay 82957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 83057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 83157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 83257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 83357b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 83457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 835cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 836cee3aa6bSSatish Balay col++; a += bs; 83757b952d6SSatish Balay } 83857b952d6SSatish Balay } 83957b952d6SSatish Balay } 84057b952d6SSatish Balay /* copy over the B part */ 84157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 84257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 84357b952d6SSatish Balay row = baij->rstart*bs; 84457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 84557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 84657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 84757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 84857b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 84957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 850cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 851cee3aa6bSSatish Balay col++; a += bs; 85257b952d6SSatish Balay } 85357b952d6SSatish Balay } 85457b952d6SSatish Balay } 85557b952d6SSatish Balay PetscFree(rvals); 8566d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8576d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 85857b952d6SSatish Balay if (!rank) { 85957b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 86057b952d6SSatish Balay } 86157b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 86257b952d6SSatish Balay } 86357b952d6SSatish Balay } 86457b952d6SSatish Balay return 0; 86557b952d6SSatish Balay } 86657b952d6SSatish Balay 86757b952d6SSatish Balay 86857b952d6SSatish Balay 8695615d1e5SSatish Balay #undef __FUNC__ 8705615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 871ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 87257b952d6SSatish Balay { 87357b952d6SSatish Balay Mat mat = (Mat) obj; 87457b952d6SSatish Balay int ierr; 87557b952d6SSatish Balay ViewerType vtype; 87657b952d6SSatish Balay 87757b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 87857b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 87957b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 88057b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 88157b952d6SSatish Balay } 88257b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 88357b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 88457b952d6SSatish Balay } 88557b952d6SSatish Balay return 0; 88657b952d6SSatish Balay } 88757b952d6SSatish Balay 8885615d1e5SSatish Balay #undef __FUNC__ 8895615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 890ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 89179bdfe76SSatish Balay { 89279bdfe76SSatish Balay Mat mat = (Mat) obj; 89379bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 89479bdfe76SSatish Balay int ierr; 89579bdfe76SSatish Balay 89679bdfe76SSatish Balay #if defined(PETSC_LOG) 89779bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 89879bdfe76SSatish Balay #endif 89979bdfe76SSatish Balay 90083e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 90179bdfe76SSatish Balay PetscFree(baij->rowners); 90279bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 90379bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 90479bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 90579bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 90679bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 90779bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 90879bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 90930793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 91079bdfe76SSatish Balay PetscFree(baij); 91190f02eecSBarry Smith if (mat->mapping) { 91290f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 91390f02eecSBarry Smith } 91479bdfe76SSatish Balay PLogObjectDestroy(mat); 91579bdfe76SSatish Balay PetscHeaderDestroy(mat); 91679bdfe76SSatish Balay return 0; 91779bdfe76SSatish Balay } 91879bdfe76SSatish Balay 9195615d1e5SSatish Balay #undef __FUNC__ 9205615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 921ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 922cee3aa6bSSatish Balay { 923cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 92447b4a8eaSLois Curfman McInnes int ierr, nt; 925cee3aa6bSSatish Balay 926c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 92747b4a8eaSLois Curfman McInnes if (nt != a->n) { 928ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 92947b4a8eaSLois Curfman McInnes } 930c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 93147b4a8eaSLois Curfman McInnes if (nt != a->m) { 932e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 93347b4a8eaSLois Curfman McInnes } 93443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 935cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 93643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 937cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 93843a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 939cee3aa6bSSatish Balay return 0; 940cee3aa6bSSatish Balay } 941cee3aa6bSSatish Balay 9425615d1e5SSatish Balay #undef __FUNC__ 9435615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 944ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 945cee3aa6bSSatish Balay { 946cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 947cee3aa6bSSatish Balay int ierr; 94843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 949cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 95043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 951cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 952cee3aa6bSSatish Balay return 0; 953cee3aa6bSSatish Balay } 954cee3aa6bSSatish Balay 9555615d1e5SSatish Balay #undef __FUNC__ 9565615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 957ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 958cee3aa6bSSatish Balay { 959cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 960cee3aa6bSSatish Balay int ierr; 961cee3aa6bSSatish Balay 962cee3aa6bSSatish Balay /* do nondiagonal part */ 963cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 964cee3aa6bSSatish Balay /* send it on its way */ 965537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 966cee3aa6bSSatish Balay /* do local part */ 967cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 968cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 969cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 970cee3aa6bSSatish Balay /* but is not perhaps always true. */ 971639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 972cee3aa6bSSatish Balay return 0; 973cee3aa6bSSatish Balay } 974cee3aa6bSSatish Balay 9755615d1e5SSatish Balay #undef __FUNC__ 9765615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 977ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 978cee3aa6bSSatish Balay { 979cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 980cee3aa6bSSatish Balay int ierr; 981cee3aa6bSSatish Balay 982cee3aa6bSSatish Balay /* do nondiagonal part */ 983cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 984cee3aa6bSSatish Balay /* send it on its way */ 985537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 986cee3aa6bSSatish Balay /* do local part */ 987cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 988cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 989cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 990cee3aa6bSSatish Balay /* but is not perhaps always true. */ 991537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 992cee3aa6bSSatish Balay return 0; 993cee3aa6bSSatish Balay } 994cee3aa6bSSatish Balay 995cee3aa6bSSatish Balay /* 996cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 997cee3aa6bSSatish Balay diagonal block 998cee3aa6bSSatish Balay */ 9995615d1e5SSatish Balay #undef __FUNC__ 10005615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1001ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1002cee3aa6bSSatish Balay { 1003cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1004cee3aa6bSSatish Balay if (a->M != a->N) 1005e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 1006cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 1007cee3aa6bSSatish Balay } 1008cee3aa6bSSatish Balay 10095615d1e5SSatish Balay #undef __FUNC__ 10105615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1011ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1012cee3aa6bSSatish Balay { 1013cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1014cee3aa6bSSatish Balay int ierr; 1015cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1016cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1017cee3aa6bSSatish Balay return 0; 1018cee3aa6bSSatish Balay } 1019026e39d0SSatish Balay 10205615d1e5SSatish Balay #undef __FUNC__ 10215615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1022ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 102357b952d6SSatish Balay { 102457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 102557b952d6SSatish Balay *m = mat->M; *n = mat->N; 102657b952d6SSatish Balay return 0; 102757b952d6SSatish Balay } 102857b952d6SSatish Balay 10295615d1e5SSatish Balay #undef __FUNC__ 10305615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1031ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 103257b952d6SSatish Balay { 103357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 103457b952d6SSatish Balay *m = mat->m; *n = mat->N; 103557b952d6SSatish Balay return 0; 103657b952d6SSatish Balay } 103757b952d6SSatish Balay 10385615d1e5SSatish Balay #undef __FUNC__ 10395615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1040ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 104157b952d6SSatish Balay { 104257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 104357b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 104457b952d6SSatish Balay return 0; 104557b952d6SSatish Balay } 104657b952d6SSatish Balay 1047acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1048acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1049acdf5bf4SSatish Balay 10505615d1e5SSatish Balay #undef __FUNC__ 10515615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1052acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1053acdf5bf4SSatish Balay { 1054acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1055acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1056acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1057d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1058d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1059acdf5bf4SSatish Balay 1060e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1061acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1062acdf5bf4SSatish Balay 1063acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1064acdf5bf4SSatish Balay /* 1065acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1066acdf5bf4SSatish Balay */ 1067acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1068bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1069bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1070acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1071acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1072acdf5bf4SSatish Balay } 1073acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1074acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1075acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1076acdf5bf4SSatish Balay } 1077acdf5bf4SSatish Balay 1078acdf5bf4SSatish Balay 1079e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 1080d9d09a02SSatish Balay lrow = row - brstart; 1081acdf5bf4SSatish Balay 1082acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1083acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1084acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1085acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1086acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1087acdf5bf4SSatish Balay nztot = nzA + nzB; 1088acdf5bf4SSatish Balay 1089acdf5bf4SSatish Balay cmap = mat->garray; 1090acdf5bf4SSatish Balay if (v || idx) { 1091acdf5bf4SSatish Balay if (nztot) { 1092acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1093acdf5bf4SSatish Balay int imark = -1; 1094acdf5bf4SSatish Balay if (v) { 1095acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1096acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1097d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1098acdf5bf4SSatish Balay else break; 1099acdf5bf4SSatish Balay } 1100acdf5bf4SSatish Balay imark = i; 1101acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1102acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1103acdf5bf4SSatish Balay } 1104acdf5bf4SSatish Balay if (idx) { 1105acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1106acdf5bf4SSatish Balay if (imark > -1) { 1107acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1108bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1109acdf5bf4SSatish Balay } 1110acdf5bf4SSatish Balay } else { 1111acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1112d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1113d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1114acdf5bf4SSatish Balay else break; 1115acdf5bf4SSatish Balay } 1116acdf5bf4SSatish Balay imark = i; 1117acdf5bf4SSatish Balay } 1118d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1119d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1120acdf5bf4SSatish Balay } 1121acdf5bf4SSatish Balay } 1122d212a18eSSatish Balay else { 1123d212a18eSSatish Balay if (idx) *idx = 0; 1124d212a18eSSatish Balay if (v) *v = 0; 1125d212a18eSSatish Balay } 1126acdf5bf4SSatish Balay } 1127acdf5bf4SSatish Balay *nz = nztot; 1128acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1129acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1130acdf5bf4SSatish Balay return 0; 1131acdf5bf4SSatish Balay } 1132acdf5bf4SSatish Balay 11335615d1e5SSatish Balay #undef __FUNC__ 11345615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1135acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1136acdf5bf4SSatish Balay { 1137acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1138acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1139e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 1140acdf5bf4SSatish Balay } 1141acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 1142acdf5bf4SSatish Balay return 0; 1143acdf5bf4SSatish Balay } 1144acdf5bf4SSatish Balay 11455615d1e5SSatish Balay #undef __FUNC__ 11465615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1147ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 11485a838052SSatish Balay { 11495a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 11505a838052SSatish Balay *bs = baij->bs; 11515a838052SSatish Balay return 0; 11525a838052SSatish Balay } 11535a838052SSatish Balay 11545615d1e5SSatish Balay #undef __FUNC__ 11555615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1156ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 115758667388SSatish Balay { 115858667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 115958667388SSatish Balay int ierr; 116058667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 116158667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 116258667388SSatish Balay return 0; 116358667388SSatish Balay } 11640ac07820SSatish Balay 11655615d1e5SSatish Balay #undef __FUNC__ 11665615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1167ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 11680ac07820SSatish Balay { 11694e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 11704e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 11717d57db60SLois Curfman McInnes int ierr; 11727d57db60SLois Curfman McInnes double isend[5], irecv[5]; 11730ac07820SSatish Balay 11744e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 11754e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 11764e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 11774e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 11784e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 11794e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11804e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 11814e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11824e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 11830ac07820SSatish Balay if (flag == MAT_LOCAL) { 11844e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11854e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11864e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11874e220ebcSLois Curfman McInnes info->memory = isend[3]; 11884e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11890ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1190dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 11914e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11924e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11934e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11944e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11954e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11960ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1197dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 11984e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11994e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 12004e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 12014e220ebcSLois Curfman McInnes info->memory = irecv[3]; 12024e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 12030ac07820SSatish Balay } 12044e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 12054e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 12064e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 12070ac07820SSatish Balay return 0; 12080ac07820SSatish Balay } 12090ac07820SSatish Balay 12105615d1e5SSatish Balay #undef __FUNC__ 12115615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1212ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 121358667388SSatish Balay { 121458667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 121558667388SSatish Balay 121658667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 121758667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 12186da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1219c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 122096854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1221c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1222b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1223b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1224b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1225aeafbbfcSLois Curfman McInnes a->roworiented = 1; 122658667388SSatish Balay MatSetOption(a->A,op); 122758667388SSatish Balay MatSetOption(a->B,op); 1228b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 12296da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 123058667388SSatish Balay op == MAT_SYMMETRIC || 123158667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 123258667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 123358667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 123458667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 123558667388SSatish Balay a->roworiented = 0; 123658667388SSatish Balay MatSetOption(a->A,op); 123758667388SSatish Balay MatSetOption(a->B,op); 12382b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 123990f02eecSBarry Smith a->donotstash = 1; 124090f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1241e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 124258667388SSatish Balay else 1243e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 124458667388SSatish Balay return 0; 124558667388SSatish Balay } 124658667388SSatish Balay 12475615d1e5SSatish Balay #undef __FUNC__ 12485615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1249ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 12500ac07820SSatish Balay { 12510ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 12520ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 12530ac07820SSatish Balay Mat B; 12540ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 12550ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 12560ac07820SSatish Balay Scalar *a; 12570ac07820SSatish Balay 12580ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 1259e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 12600ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 12610ac07820SSatish Balay CHKERRQ(ierr); 12620ac07820SSatish Balay 12630ac07820SSatish Balay /* copy over the A part */ 12640ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 12650ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12660ac07820SSatish Balay row = baij->rstart; 12670ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 12680ac07820SSatish Balay 12690ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12700ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12710ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12720ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12730ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 12740ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12750ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12760ac07820SSatish Balay col++; a += bs; 12770ac07820SSatish Balay } 12780ac07820SSatish Balay } 12790ac07820SSatish Balay } 12800ac07820SSatish Balay /* copy over the B part */ 12810ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 12820ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12830ac07820SSatish Balay row = baij->rstart*bs; 12840ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12850ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12860ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12870ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12880ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12890ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12900ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12910ac07820SSatish Balay col++; a += bs; 12920ac07820SSatish Balay } 12930ac07820SSatish Balay } 12940ac07820SSatish Balay } 12950ac07820SSatish Balay PetscFree(rvals); 12960ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12970ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12980ac07820SSatish Balay 12990ac07820SSatish Balay if (matout != PETSC_NULL) { 13000ac07820SSatish Balay *matout = B; 13010ac07820SSatish Balay } else { 13020ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 13030ac07820SSatish Balay PetscFree(baij->rowners); 13040ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 13050ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 13060ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 13070ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 13080ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 13090ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 13100ac07820SSatish Balay PetscFree(baij); 1311f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 13120ac07820SSatish Balay PetscHeaderDestroy(B); 13130ac07820SSatish Balay } 13140ac07820SSatish Balay return 0; 13150ac07820SSatish Balay } 13160e95ebc0SSatish Balay 13175615d1e5SSatish Balay #undef __FUNC__ 13185615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 13190e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 13200e95ebc0SSatish Balay { 13210e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 13220e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 13230e95ebc0SSatish Balay int ierr,s1,s2,s3; 13240e95ebc0SSatish Balay 13250e95ebc0SSatish Balay if (ll) { 13260e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 13270e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1328e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 13290e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 13300e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 13310e95ebc0SSatish Balay } 1332e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 13330e95ebc0SSatish Balay return 0; 13340e95ebc0SSatish Balay } 13350e95ebc0SSatish Balay 13360ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 13370ac07820SSatish Balay matrix is square and the column and row owerships are identical. 13380ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 13390ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 13400ac07820SSatish Balay routine. 13410ac07820SSatish Balay */ 13425615d1e5SSatish Balay #undef __FUNC__ 13435615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1344ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 13450ac07820SSatish Balay { 13460ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 13470ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 13480ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 13490ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 13500ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 13510ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 13520ac07820SSatish Balay MPI_Comm comm = A->comm; 13530ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 13540ac07820SSatish Balay MPI_Status recv_status,*send_status; 13550ac07820SSatish Balay IS istmp; 13560ac07820SSatish Balay 13570ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 13580ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 13590ac07820SSatish Balay 13600ac07820SSatish Balay /* first count number of contributors to each processor */ 13610ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 13620ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 13630ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 13640ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13650ac07820SSatish Balay idx = rows[i]; 13660ac07820SSatish Balay found = 0; 13670ac07820SSatish Balay for ( j=0; j<size; j++ ) { 13680ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 13690ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 13700ac07820SSatish Balay } 13710ac07820SSatish Balay } 1372e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 13730ac07820SSatish Balay } 13740ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 13750ac07820SSatish Balay 13760ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 13770ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 13780ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 13790ac07820SSatish Balay nrecvs = work[rank]; 13800ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 13810ac07820SSatish Balay nmax = work[rank]; 13820ac07820SSatish Balay PetscFree(work); 13830ac07820SSatish Balay 13840ac07820SSatish Balay /* post receives: */ 13850ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 13860ac07820SSatish Balay CHKPTRQ(rvalues); 13870ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 13880ac07820SSatish Balay CHKPTRQ(recv_waits); 13890ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13900ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 13910ac07820SSatish Balay } 13920ac07820SSatish Balay 13930ac07820SSatish Balay /* do sends: 13940ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13950ac07820SSatish Balay the ith processor 13960ac07820SSatish Balay */ 13970ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 13980ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 13990ac07820SSatish Balay CHKPTRQ(send_waits); 14000ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 14010ac07820SSatish Balay starts[0] = 0; 14020ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 14030ac07820SSatish Balay for ( i=0; i<N; i++ ) { 14040ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 14050ac07820SSatish Balay } 14060ac07820SSatish Balay ISRestoreIndices(is,&rows); 14070ac07820SSatish Balay 14080ac07820SSatish Balay starts[0] = 0; 14090ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 14100ac07820SSatish Balay count = 0; 14110ac07820SSatish Balay for ( i=0; i<size; i++ ) { 14120ac07820SSatish Balay if (procs[i]) { 14130ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 14140ac07820SSatish Balay } 14150ac07820SSatish Balay } 14160ac07820SSatish Balay PetscFree(starts); 14170ac07820SSatish Balay 14180ac07820SSatish Balay base = owners[rank]*bs; 14190ac07820SSatish Balay 14200ac07820SSatish Balay /* wait on receives */ 14210ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 14220ac07820SSatish Balay source = lens + nrecvs; 14230ac07820SSatish Balay count = nrecvs; slen = 0; 14240ac07820SSatish Balay while (count) { 14250ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 14260ac07820SSatish Balay /* unpack receives into our local space */ 14270ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 14280ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 14290ac07820SSatish Balay lens[imdex] = n; 14300ac07820SSatish Balay slen += n; 14310ac07820SSatish Balay count--; 14320ac07820SSatish Balay } 14330ac07820SSatish Balay PetscFree(recv_waits); 14340ac07820SSatish Balay 14350ac07820SSatish Balay /* move the data into the send scatter */ 14360ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 14370ac07820SSatish Balay count = 0; 14380ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 14390ac07820SSatish Balay values = rvalues + i*nmax; 14400ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 14410ac07820SSatish Balay lrows[count++] = values[j] - base; 14420ac07820SSatish Balay } 14430ac07820SSatish Balay } 14440ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 14450ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 14460ac07820SSatish Balay 14470ac07820SSatish Balay /* actually zap the local rows */ 1448029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 14490ac07820SSatish Balay PLogObjectParent(A,istmp); 14500ac07820SSatish Balay PetscFree(lrows); 14510ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 14520ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 14530ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 14540ac07820SSatish Balay 14550ac07820SSatish Balay /* wait on sends */ 14560ac07820SSatish Balay if (nsends) { 14570ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 14580ac07820SSatish Balay CHKPTRQ(send_status); 14590ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 14600ac07820SSatish Balay PetscFree(send_status); 14610ac07820SSatish Balay } 14620ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 14630ac07820SSatish Balay 14640ac07820SSatish Balay return 0; 14650ac07820SSatish Balay } 1466ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 14675615d1e5SSatish Balay #undef __FUNC__ 14685615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1469ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1470ba4ca20aSSatish Balay { 1471ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1472ba4ca20aSSatish Balay 1473ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1474ba4ca20aSSatish Balay else return 0; 1475ba4ca20aSSatish Balay } 14760ac07820SSatish Balay 14775615d1e5SSatish Balay #undef __FUNC__ 14785615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1479ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1480bb5a7306SBarry Smith { 1481bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1482bb5a7306SBarry Smith int ierr; 1483bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1484bb5a7306SBarry Smith return 0; 1485bb5a7306SBarry Smith } 1486bb5a7306SBarry Smith 14870ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14880ac07820SSatish Balay 148979bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 149079bdfe76SSatish Balay static struct _MatOps MatOps = { 1491bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14924c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14934c50302cSBarry Smith 0,0,0,0, 14940ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14950e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 149658667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14974c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14984c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14994c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 150094a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1501d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1502ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1503bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1504ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 150579bdfe76SSatish Balay 150679bdfe76SSatish Balay 15075615d1e5SSatish Balay #undef __FUNC__ 15085615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 150979bdfe76SSatish Balay /*@C 151079bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 151179bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 151279bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 151379bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 151479bdfe76SSatish Balay performance can be increased by more than a factor of 50. 151579bdfe76SSatish Balay 151679bdfe76SSatish Balay Input Parameters: 151779bdfe76SSatish Balay . comm - MPI communicator 151879bdfe76SSatish Balay . bs - size of blockk 151979bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 152092e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 152192e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 152292e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 152392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 152492e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 152579bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 152692e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 152779bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 152879bdfe76SSatish Balay submatrix (same for all local rows) 152992e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 153092e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 153192e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 153292e8d321SLois Curfman McInnes it is zero. 153392e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 153479bdfe76SSatish Balay submatrix (same for all local rows). 153592e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 153692e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 153792e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 153879bdfe76SSatish Balay 153979bdfe76SSatish Balay Output Parameter: 154079bdfe76SSatish Balay . A - the matrix 154179bdfe76SSatish Balay 154279bdfe76SSatish Balay Notes: 154379bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 154479bdfe76SSatish Balay (possibly both). 154579bdfe76SSatish Balay 154679bdfe76SSatish Balay Storage Information: 154779bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 154879bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 154979bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 155079bdfe76SSatish Balay local matrix (a rectangular submatrix). 155179bdfe76SSatish Balay 155279bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 155379bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 155479bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 155579bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 155679bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 155779bdfe76SSatish Balay 155879bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 155979bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 156079bdfe76SSatish Balay 156179bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 156279bdfe76SSatish Balay $ ------------------- 156379bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 156479bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 156579bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 156679bdfe76SSatish Balay $ ------------------- 156779bdfe76SSatish Balay $ 156879bdfe76SSatish Balay 156979bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 157079bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 157179bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 157257b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 157379bdfe76SSatish Balay 157479bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 157579bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 157679bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 157792e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 157892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15796da5968aSLois Curfman McInnes matrices. 158079bdfe76SSatish Balay 158192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 158279bdfe76SSatish Balay 158379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 158479bdfe76SSatish Balay @*/ 158579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 158679bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 158779bdfe76SSatish Balay { 158879bdfe76SSatish Balay Mat B; 158979bdfe76SSatish Balay Mat_MPIBAIJ *b; 15904c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 159179bdfe76SSatish Balay 1592e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 159379bdfe76SSatish Balay *A = 0; 1594f09e8eb9SSatish Balay PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 159579bdfe76SSatish Balay PLogObjectCreate(B); 159679bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 159779bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 159879bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15994c50302cSBarry Smith MPI_Comm_size(comm,&size); 16004c50302cSBarry Smith if (size == 1) { 16014c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 16024c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 16034c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 16044c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 16054c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 16064c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 16074c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 16084c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 16094c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 16104c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 16114c50302cSBarry Smith } 16124c50302cSBarry Smith 161379bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 161479bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 161590f02eecSBarry Smith B->mapping = 0; 161679bdfe76SSatish Balay B->factor = 0; 161779bdfe76SSatish Balay B->assembled = PETSC_FALSE; 161879bdfe76SSatish Balay 1619e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 162079bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 162179bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 162279bdfe76SSatish Balay 162379bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1624e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1625e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1626e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1627cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1628cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 162979bdfe76SSatish Balay 163079bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 163179bdfe76SSatish Balay work[0] = m; work[1] = n; 163279bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 163379bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 163479bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 163579bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 163679bdfe76SSatish Balay } 163779bdfe76SSatish Balay if (m == PETSC_DECIDE) { 163879bdfe76SSatish Balay Mbs = M/bs; 1639e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 164079bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 164179bdfe76SSatish Balay m = mbs*bs; 164279bdfe76SSatish Balay } 164379bdfe76SSatish Balay if (n == PETSC_DECIDE) { 164479bdfe76SSatish Balay Nbs = N/bs; 1645e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 164679bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 164779bdfe76SSatish Balay n = nbs*bs; 164879bdfe76SSatish Balay } 1649e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 165079bdfe76SSatish Balay 165179bdfe76SSatish Balay b->m = m; B->m = m; 165279bdfe76SSatish Balay b->n = n; B->n = n; 165379bdfe76SSatish Balay b->N = N; B->N = N; 165479bdfe76SSatish Balay b->M = M; B->M = M; 165579bdfe76SSatish Balay b->bs = bs; 165679bdfe76SSatish Balay b->bs2 = bs*bs; 165779bdfe76SSatish Balay b->mbs = mbs; 165879bdfe76SSatish Balay b->nbs = nbs; 165979bdfe76SSatish Balay b->Mbs = Mbs; 166079bdfe76SSatish Balay b->Nbs = Nbs; 166179bdfe76SSatish Balay 166279bdfe76SSatish Balay /* build local table of row and column ownerships */ 166379bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1664f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 16650ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 166679bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 166779bdfe76SSatish Balay b->rowners[0] = 0; 166879bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 166979bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 167079bdfe76SSatish Balay } 167179bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 167279bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 16734fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 16744fa0d573SSatish Balay b->rend_bs = b->rend * bs; 16754fa0d573SSatish Balay 167679bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 167779bdfe76SSatish Balay b->cowners[0] = 0; 167879bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 167979bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 168079bdfe76SSatish Balay } 168179bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 168279bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 16834fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 16844fa0d573SSatish Balay b->cend_bs = b->cend * bs; 168579bdfe76SSatish Balay 168679bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1687029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 168879bdfe76SSatish Balay PLogObjectParent(B,b->A); 168979bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1690029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 169179bdfe76SSatish Balay PLogObjectParent(B,b->B); 169279bdfe76SSatish Balay 169379bdfe76SSatish Balay /* build cache for off array entries formed */ 169479bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 169590f02eecSBarry Smith b->donotstash = 0; 169679bdfe76SSatish Balay b->colmap = 0; 169779bdfe76SSatish Balay b->garray = 0; 169879bdfe76SSatish Balay b->roworiented = 1; 169979bdfe76SSatish Balay 170030793edcSSatish Balay /* stuff used in block assembly */ 170130793edcSSatish Balay b->barray = 0; 170230793edcSSatish Balay 170379bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 170479bdfe76SSatish Balay b->lvec = 0; 170579bdfe76SSatish Balay b->Mvctx = 0; 170679bdfe76SSatish Balay 170779bdfe76SSatish Balay /* stuff for MatGetRow() */ 170879bdfe76SSatish Balay b->rowindices = 0; 170979bdfe76SSatish Balay b->rowvalues = 0; 171079bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 171179bdfe76SSatish Balay 171279bdfe76SSatish Balay *A = B; 171379bdfe76SSatish Balay return 0; 171479bdfe76SSatish Balay } 1715026e39d0SSatish Balay 17165615d1e5SSatish Balay #undef __FUNC__ 17175615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 17180ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 17190ac07820SSatish Balay { 17200ac07820SSatish Balay Mat mat; 17210ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 17220ac07820SSatish Balay int ierr, len=0, flg; 17230ac07820SSatish Balay 17240ac07820SSatish Balay *newmat = 0; 1725f09e8eb9SSatish Balay PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 17260ac07820SSatish Balay PLogObjectCreate(mat); 17270ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 17280ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 17290ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 17300ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 17310ac07820SSatish Balay mat->factor = matin->factor; 17320ac07820SSatish Balay mat->assembled = PETSC_TRUE; 17330ac07820SSatish Balay 17340ac07820SSatish Balay a->m = mat->m = oldmat->m; 17350ac07820SSatish Balay a->n = mat->n = oldmat->n; 17360ac07820SSatish Balay a->M = mat->M = oldmat->M; 17370ac07820SSatish Balay a->N = mat->N = oldmat->N; 17380ac07820SSatish Balay 17390ac07820SSatish Balay a->bs = oldmat->bs; 17400ac07820SSatish Balay a->bs2 = oldmat->bs2; 17410ac07820SSatish Balay a->mbs = oldmat->mbs; 17420ac07820SSatish Balay a->nbs = oldmat->nbs; 17430ac07820SSatish Balay a->Mbs = oldmat->Mbs; 17440ac07820SSatish Balay a->Nbs = oldmat->Nbs; 17450ac07820SSatish Balay 17460ac07820SSatish Balay a->rstart = oldmat->rstart; 17470ac07820SSatish Balay a->rend = oldmat->rend; 17480ac07820SSatish Balay a->cstart = oldmat->cstart; 17490ac07820SSatish Balay a->cend = oldmat->cend; 17500ac07820SSatish Balay a->size = oldmat->size; 17510ac07820SSatish Balay a->rank = oldmat->rank; 1752e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 17530ac07820SSatish Balay a->rowvalues = 0; 17540ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 175530793edcSSatish Balay a->barray = 0; 17560ac07820SSatish Balay 17570ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1758f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 17590ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 17600ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 17610ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17620ac07820SSatish Balay if (oldmat->colmap) { 17630ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 17640ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 17650ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 17660ac07820SSatish Balay } else a->colmap = 0; 17670ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 17680ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 17690ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 17700ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 17710ac07820SSatish Balay } else a->garray = 0; 17720ac07820SSatish Balay 17730ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 17740ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 17750ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 17760ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 17770ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 17780ac07820SSatish Balay PLogObjectParent(mat,a->A); 17790ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 17800ac07820SSatish Balay PLogObjectParent(mat,a->B); 17810ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 17820ac07820SSatish Balay if (flg) { 17830ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 17840ac07820SSatish Balay } 17850ac07820SSatish Balay *newmat = mat; 17860ac07820SSatish Balay return 0; 17870ac07820SSatish Balay } 178857b952d6SSatish Balay 178957b952d6SSatish Balay #include "sys.h" 179057b952d6SSatish Balay 17915615d1e5SSatish Balay #undef __FUNC__ 17925615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 179357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 179457b952d6SSatish Balay { 179557b952d6SSatish Balay Mat A; 179657b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 179757b952d6SSatish Balay Scalar *vals,*buf; 179857b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 179957b952d6SSatish Balay MPI_Status status; 1800cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 180157b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 180257b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 180357b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 180457b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 180557b952d6SSatish Balay 180657b952d6SSatish Balay 180757b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 180857b952d6SSatish Balay bs2 = bs*bs; 180957b952d6SSatish Balay 181057b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 181157b952d6SSatish Balay if (!rank) { 181257b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 181357b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1814e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 181557b952d6SSatish Balay } 181657b952d6SSatish Balay 181757b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 181857b952d6SSatish Balay M = header[1]; N = header[2]; 181957b952d6SSatish Balay 1820e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 182157b952d6SSatish Balay 182257b952d6SSatish Balay /* 182357b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 182457b952d6SSatish Balay divisible by the blocksize 182557b952d6SSatish Balay */ 182657b952d6SSatish Balay Mbs = M/bs; 182757b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 182857b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 182957b952d6SSatish Balay else Mbs++; 183057b952d6SSatish Balay if (extra_rows &&!rank) { 1831b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 183257b952d6SSatish Balay } 1833537820f0SBarry Smith 183457b952d6SSatish Balay /* determine ownership of all rows */ 183557b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 183657b952d6SSatish Balay m = mbs * bs; 1837cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1838cee3aa6bSSatish Balay browners = rowners + size + 1; 183957b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 184057b952d6SSatish Balay rowners[0] = 0; 1841cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1842cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 184357b952d6SSatish Balay rstart = rowners[rank]; 184457b952d6SSatish Balay rend = rowners[rank+1]; 184557b952d6SSatish Balay 184657b952d6SSatish Balay /* distribute row lengths to all processors */ 184757b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 184857b952d6SSatish Balay if (!rank) { 184957b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 185057b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 185157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 185257b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1853cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1854cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 185557b952d6SSatish Balay PetscFree(sndcounts); 185657b952d6SSatish Balay } 185757b952d6SSatish Balay else { 185857b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 185957b952d6SSatish Balay } 186057b952d6SSatish Balay 186157b952d6SSatish Balay if (!rank) { 186257b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 186357b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 186457b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 186557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 186657b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 186757b952d6SSatish Balay procsnz[i] += rowlengths[j]; 186857b952d6SSatish Balay } 186957b952d6SSatish Balay } 187057b952d6SSatish Balay PetscFree(rowlengths); 187157b952d6SSatish Balay 187257b952d6SSatish Balay /* determine max buffer needed and allocate it */ 187357b952d6SSatish Balay maxnz = 0; 187457b952d6SSatish Balay for ( i=0; i<size; i++ ) { 187557b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 187657b952d6SSatish Balay } 187757b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 187857b952d6SSatish Balay 187957b952d6SSatish Balay /* read in my part of the matrix column indices */ 188057b952d6SSatish Balay nz = procsnz[0]; 188157b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 188257b952d6SSatish Balay mycols = ibuf; 1883cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 188457b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1885cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1886cee3aa6bSSatish Balay 188757b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 188857b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 188957b952d6SSatish Balay nz = procsnz[i]; 189057b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 189157b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 189257b952d6SSatish Balay } 189357b952d6SSatish Balay /* read in the stuff for the last proc */ 189457b952d6SSatish Balay if ( size != 1 ) { 189557b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 189657b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 189757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1898cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 189957b952d6SSatish Balay } 190057b952d6SSatish Balay PetscFree(cols); 190157b952d6SSatish Balay } 190257b952d6SSatish Balay else { 190357b952d6SSatish Balay /* determine buffer space needed for message */ 190457b952d6SSatish Balay nz = 0; 190557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 190657b952d6SSatish Balay nz += locrowlens[i]; 190757b952d6SSatish Balay } 190857b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 190957b952d6SSatish Balay mycols = ibuf; 191057b952d6SSatish Balay /* receive message of column indices*/ 191157b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 191257b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1913e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 191457b952d6SSatish Balay } 191557b952d6SSatish Balay 191657b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1917cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1918cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 191957b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1920cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 192157b952d6SSatish Balay masked1 = mask + Mbs; 192257b952d6SSatish Balay masked2 = masked1 + Mbs; 192357b952d6SSatish Balay rowcount = 0; nzcount = 0; 192457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 192557b952d6SSatish Balay dcount = 0; 192657b952d6SSatish Balay odcount = 0; 192757b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 192857b952d6SSatish Balay kmax = locrowlens[rowcount]; 192957b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 193057b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 193157b952d6SSatish Balay if (!mask[tmp]) { 193257b952d6SSatish Balay mask[tmp] = 1; 193357b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 193457b952d6SSatish Balay else masked1[dcount++] = tmp; 193557b952d6SSatish Balay } 193657b952d6SSatish Balay } 193757b952d6SSatish Balay rowcount++; 193857b952d6SSatish Balay } 1939cee3aa6bSSatish Balay 194057b952d6SSatish Balay dlens[i] = dcount; 194157b952d6SSatish Balay odlens[i] = odcount; 1942cee3aa6bSSatish Balay 194357b952d6SSatish Balay /* zero out the mask elements we set */ 194457b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 194557b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 194657b952d6SSatish Balay } 1947cee3aa6bSSatish Balay 194857b952d6SSatish Balay /* create our matrix */ 1949537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1950537820f0SBarry Smith CHKERRQ(ierr); 195157b952d6SSatish Balay A = *newmat; 19526d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 195357b952d6SSatish Balay 195457b952d6SSatish Balay if (!rank) { 195557b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 195657b952d6SSatish Balay /* read in my part of the matrix numerical values */ 195757b952d6SSatish Balay nz = procsnz[0]; 195857b952d6SSatish Balay vals = buf; 1959cee3aa6bSSatish Balay mycols = ibuf; 1960cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 196157b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1962cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1963537820f0SBarry Smith 196457b952d6SSatish Balay /* insert into matrix */ 196557b952d6SSatish Balay jj = rstart*bs; 196657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 196757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 196857b952d6SSatish Balay mycols += locrowlens[i]; 196957b952d6SSatish Balay vals += locrowlens[i]; 197057b952d6SSatish Balay jj++; 197157b952d6SSatish Balay } 197257b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 197357b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 197457b952d6SSatish Balay nz = procsnz[i]; 197557b952d6SSatish Balay vals = buf; 197657b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 197757b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 197857b952d6SSatish Balay } 197957b952d6SSatish Balay /* the last proc */ 198057b952d6SSatish Balay if ( size != 1 ){ 198157b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1982cee3aa6bSSatish Balay vals = buf; 198357b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 198457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1985cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 198657b952d6SSatish Balay } 198757b952d6SSatish Balay PetscFree(procsnz); 198857b952d6SSatish Balay } 198957b952d6SSatish Balay else { 199057b952d6SSatish Balay /* receive numeric values */ 199157b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 199257b952d6SSatish Balay 199357b952d6SSatish Balay /* receive message of values*/ 199457b952d6SSatish Balay vals = buf; 1995cee3aa6bSSatish Balay mycols = ibuf; 199657b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 199757b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1998e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 199957b952d6SSatish Balay 200057b952d6SSatish Balay /* insert into matrix */ 200157b952d6SSatish Balay jj = rstart*bs; 2002cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 200357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 200457b952d6SSatish Balay mycols += locrowlens[i]; 200557b952d6SSatish Balay vals += locrowlens[i]; 200657b952d6SSatish Balay jj++; 200757b952d6SSatish Balay } 200857b952d6SSatish Balay } 200957b952d6SSatish Balay PetscFree(locrowlens); 201057b952d6SSatish Balay PetscFree(buf); 201157b952d6SSatish Balay PetscFree(ibuf); 201257b952d6SSatish Balay PetscFree(rowners); 201357b952d6SSatish Balay PetscFree(dlens); 2014cee3aa6bSSatish Balay PetscFree(mask); 20156d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 20166d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 201757b952d6SSatish Balay return 0; 201857b952d6SSatish Balay } 201957b952d6SSatish Balay 202057b952d6SSatish Balay 2021