179bdfe76SSatish Balay #ifndef lint 2*2b362799SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.63 1997/04/01 14:34:09 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2257b952d6SSatish Balay 233b2fbd54SBarry Smith 24537820f0SBarry Smith /* 25537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2857b952d6SSatish Balay length of colmap equals the global matrix length. 2957b952d6SSatish Balay */ 305615d1e5SSatish Balay #undef __FUNC__ 315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3357b952d6SSatish Balay { 3457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3757b952d6SSatish Balay 3857b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 3957b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4057b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4257b952d6SSatish Balay return 0; 4357b952d6SSatish Balay } 4457b952d6SSatish Balay 455615d1e5SSatish Balay #undef __FUNC__ 465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 4957b952d6SSatish Balay { 503b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5157b952d6SSatish Balay int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 553b2fbd54SBarry Smith return 0; 563b2fbd54SBarry Smith } 573b2fbd54SBarry Smith 585615d1e5SSatish Balay #undef __FUNC__ 595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 613b2fbd54SBarry Smith PetscTruth *done) 623b2fbd54SBarry Smith { 633b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 643b2fbd54SBarry Smith int ierr; 653b2fbd54SBarry Smith if (aij->size == 1) { 663b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6857b952d6SSatish Balay return 0; 6957b952d6SSatish Balay } 7080c1aa95SSatish Balay #define CHUNKSIZE 10 7180c1aa95SSatish Balay 72f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7380c1aa95SSatish Balay { \ 7480c1aa95SSatish Balay \ 7580c1aa95SSatish Balay brow = row/bs; \ 7680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 7880c1aa95SSatish Balay bcol = col/bs; \ 7980c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 80ab26458aSBarry Smith low = 0; high = nrow; \ 81ab26458aSBarry Smith while (high-low > 3) { \ 82ab26458aSBarry Smith t = (low+high)/2; \ 83ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 84ab26458aSBarry Smith else low = t; \ 85ab26458aSBarry Smith } \ 86ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 8780c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8880c1aa95SSatish Balay if (rp[_i] == bcol) { \ 8980c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 90eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 91eada6651SSatish Balay else *bap = value; \ 92ac7a638eSSatish Balay goto a_noinsert; \ 9380c1aa95SSatish Balay } \ 9480c1aa95SSatish Balay } \ 95ac7a638eSSatish Balay if (a->nonew) goto a_noinsert; \ 9680c1aa95SSatish Balay if (nrow >= rmax) { \ 9780c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9880c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 9980c1aa95SSatish Balay Scalar *new_a; \ 10080c1aa95SSatish Balay \ 10180c1aa95SSatish Balay /* malloc new storage space */ \ 10280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 10380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 10480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10580c1aa95SSatish Balay new_i = new_j + new_nz; \ 10680c1aa95SSatish Balay \ 10780c1aa95SSatish Balay /* copy over old data into new slots */ \ 10880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 10980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 11080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 11180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 11280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 11380c1aa95SSatish Balay len*sizeof(int)); \ 11480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 11780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 11880c1aa95SSatish Balay /* free up old matrix storage */ \ 11980c1aa95SSatish Balay PetscFree(a->a); \ 12080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 12180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 12280c1aa95SSatish Balay a->singlemalloc = 1; \ 12380c1aa95SSatish Balay \ 12480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 125ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 12680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 12780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 12880c1aa95SSatish Balay a->reallocs++; \ 12980c1aa95SSatish Balay a->nz++; \ 13080c1aa95SSatish Balay } \ 13180c1aa95SSatish Balay N = nrow++ - 1; \ 13280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 13380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 13480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13680c1aa95SSatish Balay } \ 13780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 13880c1aa95SSatish Balay rp[_i] = bcol; \ 13980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 140ac7a638eSSatish Balay a_noinsert:; \ 14180c1aa95SSatish Balay ailen[brow] = nrow; \ 14280c1aa95SSatish Balay } 14357b952d6SSatish Balay 144ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 145ac7a638eSSatish Balay { \ 146ac7a638eSSatish Balay \ 147ac7a638eSSatish Balay brow = row/bs; \ 148ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 149ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 150ac7a638eSSatish Balay bcol = col/bs; \ 151ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 152ac7a638eSSatish Balay low = 0; high = nrow; \ 153ac7a638eSSatish Balay while (high-low > 3) { \ 154ac7a638eSSatish Balay t = (low+high)/2; \ 155ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 156ac7a638eSSatish Balay else low = t; \ 157ac7a638eSSatish Balay } \ 158ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 159ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 160ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 161ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 162ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 163ac7a638eSSatish Balay else *bap = value; \ 164ac7a638eSSatish Balay goto b_noinsert; \ 165ac7a638eSSatish Balay } \ 166ac7a638eSSatish Balay } \ 16774c639caSSatish Balay if (b->nonew) goto b_noinsert; \ 168ac7a638eSSatish Balay if (nrow >= rmax) { \ 169ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 17074c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 171ac7a638eSSatish Balay Scalar *new_a; \ 172ac7a638eSSatish Balay \ 173ac7a638eSSatish Balay /* malloc new storage space */ \ 17474c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 175ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 176ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 177ac7a638eSSatish Balay new_i = new_j + new_nz; \ 178ac7a638eSSatish Balay \ 179ac7a638eSSatish Balay /* copy over old data into new slots */ \ 180ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 18174c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 182ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 183ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 184ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 185ac7a638eSSatish Balay len*sizeof(int)); \ 186ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 187ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 188ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 189ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 190ac7a638eSSatish Balay /* free up old matrix storage */ \ 19174c639caSSatish Balay PetscFree(b->a); \ 19274c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 19374c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 19474c639caSSatish Balay b->singlemalloc = 1; \ 195ac7a638eSSatish Balay \ 196ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 197ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 19874c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 19974c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 20074c639caSSatish Balay b->reallocs++; \ 20174c639caSSatish Balay b->nz++; \ 202ac7a638eSSatish Balay } \ 203ac7a638eSSatish Balay N = nrow++ - 1; \ 204ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 205ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 206ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 207ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 208ac7a638eSSatish Balay } \ 209ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 210ac7a638eSSatish Balay rp[_i] = bcol; \ 211ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 212ac7a638eSSatish Balay b_noinsert:; \ 213ac7a638eSSatish Balay bilen[brow] = nrow; \ 214ac7a638eSSatish Balay } 215ac7a638eSSatish Balay 2165615d1e5SSatish Balay #undef __FUNC__ 2175615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 218ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 21957b952d6SSatish Balay { 22057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22157b952d6SSatish Balay Scalar value; 2224fa0d573SSatish Balay int ierr,i,j,row,col; 2234fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2244fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2254fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 22657b952d6SSatish Balay 227eada6651SSatish Balay /* Some Variables required in the macro */ 22880c1aa95SSatish Balay Mat A = baij->A; 22980c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 230ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 231ac7a638eSSatish Balay Scalar *aa=a->a; 232ac7a638eSSatish Balay 233ac7a638eSSatish Balay Mat B = baij->B; 234ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 235ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 236ac7a638eSSatish Balay Scalar *ba=b->a; 237ac7a638eSSatish Balay 238ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 239ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 240ac7a638eSSatish Balay Scalar *ap,*bap; 24180c1aa95SSatish Balay 24257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 243639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 244e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 245e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 246639f9d9dSBarry Smith #endif 24757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 24857b952d6SSatish Balay row = im[i] - rstart_orig; 24957b952d6SSatish Balay for ( j=0; j<n; j++ ) { 25057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 25157b952d6SSatish Balay col = in[j] - cstart_orig; 25257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 253f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 25480c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25557b952d6SSatish Balay } 256639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 257e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 258e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 259639f9d9dSBarry Smith #endif 26057b952d6SSatish Balay else { 26157b952d6SSatish Balay if (mat->was_assembled) { 262905e6a2fSBarry Smith if (!baij->colmap) { 263905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 264905e6a2fSBarry Smith } 265905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 26657b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 26757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 26857b952d6SSatish Balay col = in[j]; 26957b952d6SSatish Balay } 27057b952d6SSatish Balay } 27157b952d6SSatish Balay else col = in[j]; 27257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 273ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 274ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 27557b952d6SSatish Balay } 27657b952d6SSatish Balay } 27757b952d6SSatish Balay } 27857b952d6SSatish Balay else { 27990f02eecSBarry Smith if (roworiented && !baij->donotstash) { 28057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 28157b952d6SSatish Balay } 28257b952d6SSatish Balay else { 28390f02eecSBarry Smith if (!baij->donotstash) { 28457b952d6SSatish Balay row = im[i]; 28557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 28657b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 28757b952d6SSatish Balay } 28857b952d6SSatish Balay } 28957b952d6SSatish Balay } 29057b952d6SSatish Balay } 29190f02eecSBarry Smith } 29257b952d6SSatish Balay return 0; 29357b952d6SSatish Balay } 29457b952d6SSatish Balay 295ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 296ab26458aSBarry Smith #undef __FUNC__ 297ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 298ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 299ab26458aSBarry Smith { 300ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30130793edcSSatish Balay Scalar *value,*barray=baij->barray; 302abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 303ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 304ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 305ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 306ab26458aSBarry Smith 30730793edcSSatish Balay 30830793edcSSatish Balay if(!barray) { 30930793edcSSatish Balay barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 31030793edcSSatish Balay baij->barray = barray; 31130793edcSSatish Balay } 31230793edcSSatish Balay 313ab26458aSBarry Smith if (roworiented) { 314ab26458aSBarry Smith stepval = (n-1)*bs; 315ab26458aSBarry Smith } else { 316ab26458aSBarry Smith stepval = (m-1)*bs; 317ab26458aSBarry Smith } 318ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 319ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 320ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 321ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 322ab26458aSBarry Smith #endif 323ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 324ab26458aSBarry Smith row = im[i] - rstart; 325ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 32615b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 32715b57d14SSatish Balay if ((roworiented) && (n == 1)) { 32815b57d14SSatish Balay barray = v + i*bs2; 32915b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 33015b57d14SSatish Balay barray = v + j*bs2; 33115b57d14SSatish Balay } else { /* Here a copy is required */ 332ab26458aSBarry Smith if (roworiented) { 333ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 334ab26458aSBarry Smith } else { 335ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 336abef11f7SSatish Balay } 337ab26458aSBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) 338ab26458aSBarry Smith for (jj=0; jj<bs; jj++ ) 33930793edcSSatish Balay *barray++ = *value++; 34030793edcSSatish Balay barray -=bs2; 34115b57d14SSatish Balay } 342abef11f7SSatish Balay 343abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 344abef11f7SSatish Balay col = in[j] - cstart; 34530793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 346ab26458aSBarry Smith } 347ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 348ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 349ab26458aSBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 350ab26458aSBarry Smith #endif 351ab26458aSBarry Smith else { 352ab26458aSBarry Smith if (mat->was_assembled) { 353ab26458aSBarry Smith if (!baij->colmap) { 354ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 355ab26458aSBarry Smith } 356ab26458aSBarry Smith col = baij->colmap[in[j]] - 1; 357ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 359ab26458aSBarry Smith col = in[j]; 360ab26458aSBarry Smith } 361ab26458aSBarry Smith } 362ab26458aSBarry Smith else col = in[j]; 36330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364ab26458aSBarry Smith } 365ab26458aSBarry Smith } 366ab26458aSBarry Smith } 367ab26458aSBarry Smith else { 368ab26458aSBarry Smith if (!baij->donotstash) { 369ab26458aSBarry Smith if (roworiented ) { 370abef11f7SSatish Balay row = im[i]*bs; 371abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 372abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 373abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 374abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 375abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 376abef11f7SSatish Balay } 377ab26458aSBarry Smith } 378ab26458aSBarry Smith } 379ab26458aSBarry Smith } 380ab26458aSBarry Smith else { 381ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 382abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 383abef11f7SSatish Balay col = in[j]*bs; 384abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 385abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 386abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 387ab26458aSBarry Smith } 388ab26458aSBarry Smith } 389ab26458aSBarry Smith } 390abef11f7SSatish Balay 391abef11f7SSatish Balay } 392abef11f7SSatish Balay } 393ab26458aSBarry Smith } 394ab26458aSBarry Smith } 395ab26458aSBarry Smith return 0; 396ab26458aSBarry Smith } 397ab26458aSBarry Smith 3985615d1e5SSatish Balay #undef __FUNC__ 3995615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 400ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 401d6de1c52SSatish Balay { 402d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 403d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 404d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 405d6de1c52SSatish Balay 406d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 407e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 408e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 409d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 410d6de1c52SSatish Balay row = idxm[i] - bsrstart; 411d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 412e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 413e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 414d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 415d6de1c52SSatish Balay col = idxn[j] - bscstart; 416d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 417d6de1c52SSatish Balay } 418d6de1c52SSatish Balay else { 419905e6a2fSBarry Smith if (!baij->colmap) { 420905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 421905e6a2fSBarry Smith } 422e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 423dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 424d9d09a02SSatish Balay else { 425dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 426d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 427d6de1c52SSatish Balay } 428d6de1c52SSatish Balay } 429d6de1c52SSatish Balay } 430d9d09a02SSatish Balay } 431d6de1c52SSatish Balay else { 432e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 433d6de1c52SSatish Balay } 434d6de1c52SSatish Balay } 435d6de1c52SSatish Balay return 0; 436d6de1c52SSatish Balay } 437d6de1c52SSatish Balay 4385615d1e5SSatish Balay #undef __FUNC__ 4395615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 440ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 441d6de1c52SSatish Balay { 442d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 443d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 444acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 445d6de1c52SSatish Balay double sum = 0.0; 446d6de1c52SSatish Balay Scalar *v; 447d6de1c52SSatish Balay 448d6de1c52SSatish Balay if (baij->size == 1) { 449d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 450d6de1c52SSatish Balay } else { 451d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 452d6de1c52SSatish Balay v = amat->a; 453d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 454d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 455d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 456d6de1c52SSatish Balay #else 457d6de1c52SSatish Balay sum += (*v)*(*v); v++; 458d6de1c52SSatish Balay #endif 459d6de1c52SSatish Balay } 460d6de1c52SSatish Balay v = bmat->a; 461d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 462d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 463d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 464d6de1c52SSatish Balay #else 465d6de1c52SSatish Balay sum += (*v)*(*v); v++; 466d6de1c52SSatish Balay #endif 467d6de1c52SSatish Balay } 468d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 469d6de1c52SSatish Balay *norm = sqrt(*norm); 470d6de1c52SSatish Balay } 471acdf5bf4SSatish Balay else 472e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 473d6de1c52SSatish Balay } 474d6de1c52SSatish Balay return 0; 475d6de1c52SSatish Balay } 47657b952d6SSatish Balay 4775615d1e5SSatish Balay #undef __FUNC__ 4785615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 479ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 48057b952d6SSatish Balay { 48157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 48257b952d6SSatish Balay MPI_Comm comm = mat->comm; 48357b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 48457b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 48557b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 48657b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 48757b952d6SSatish Balay InsertMode addv; 48857b952d6SSatish Balay Scalar *rvalues,*svalues; 48957b952d6SSatish Balay 49057b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 491e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 49257b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 493e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 49457b952d6SSatish Balay } 495e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 49657b952d6SSatish Balay 49757b952d6SSatish Balay /* first count number of contributors to each processor */ 49857b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 49957b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 50057b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 50157b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 50257b952d6SSatish Balay idx = baij->stash.idx[i]; 50357b952d6SSatish Balay for ( j=0; j<size; j++ ) { 50457b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 50557b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 50657b952d6SSatish Balay } 50757b952d6SSatish Balay } 50857b952d6SSatish Balay } 50957b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 51057b952d6SSatish Balay 51157b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 51257b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 51357b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 51457b952d6SSatish Balay nreceives = work[rank]; 51557b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 51657b952d6SSatish Balay nmax = work[rank]; 51757b952d6SSatish Balay PetscFree(work); 51857b952d6SSatish Balay 51957b952d6SSatish Balay /* post receives: 52057b952d6SSatish Balay 1) each message will consist of ordered pairs 52157b952d6SSatish Balay (global index,value) we store the global index as a double 52257b952d6SSatish Balay to simplify the message passing. 52357b952d6SSatish Balay 2) since we don't know how long each individual message is we 52457b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 52557b952d6SSatish Balay this is a lot of wasted space. 52657b952d6SSatish Balay 52757b952d6SSatish Balay 52857b952d6SSatish Balay This could be done better. 52957b952d6SSatish Balay */ 53057b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 53157b952d6SSatish Balay CHKPTRQ(rvalues); 53257b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 53357b952d6SSatish Balay CHKPTRQ(recv_waits); 53457b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 53557b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 53657b952d6SSatish Balay comm,recv_waits+i); 53757b952d6SSatish Balay } 53857b952d6SSatish Balay 53957b952d6SSatish Balay /* do sends: 54057b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 54157b952d6SSatish Balay the ith processor 54257b952d6SSatish Balay */ 54357b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 54457b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 54557b952d6SSatish Balay CHKPTRQ(send_waits); 54657b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 54757b952d6SSatish Balay starts[0] = 0; 54857b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 54957b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 55057b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 55157b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 55257b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 55357b952d6SSatish Balay } 55457b952d6SSatish Balay PetscFree(owner); 55557b952d6SSatish Balay starts[0] = 0; 55657b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 55757b952d6SSatish Balay count = 0; 55857b952d6SSatish Balay for ( i=0; i<size; i++ ) { 55957b952d6SSatish Balay if (procs[i]) { 56057b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 56157b952d6SSatish Balay comm,send_waits+count++); 56257b952d6SSatish Balay } 56357b952d6SSatish Balay } 56457b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 56557b952d6SSatish Balay 56657b952d6SSatish Balay /* Free cache space */ 567d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 56857b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 56957b952d6SSatish Balay 57057b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 57157b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 57257b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 57357b952d6SSatish Balay baij->rmax = nmax; 57457b952d6SSatish Balay 57557b952d6SSatish Balay return 0; 57657b952d6SSatish Balay } 57757b952d6SSatish Balay 57857b952d6SSatish Balay 5795615d1e5SSatish Balay #undef __FUNC__ 5805615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 581ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 58257b952d6SSatish Balay { 58357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 58457b952d6SSatish Balay MPI_Status *send_status,recv_status; 58557b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 58657b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 58757b952d6SSatish Balay Scalar *values,val; 588e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 58957b952d6SSatish Balay 59057b952d6SSatish Balay /* wait on receives */ 59157b952d6SSatish Balay while (count) { 59257b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 59357b952d6SSatish Balay /* unpack receives into our local space */ 59457b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 59557b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 59657b952d6SSatish Balay n = n/3; 59757b952d6SSatish Balay for ( i=0; i<n; i++ ) { 59857b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 59957b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 60057b952d6SSatish Balay val = values[3*i+2]; 60157b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 60257b952d6SSatish Balay col -= baij->cstart*bs; 60357b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 60457b952d6SSatish Balay } 60557b952d6SSatish Balay else { 60657b952d6SSatish Balay if (mat->was_assembled) { 607905e6a2fSBarry Smith if (!baij->colmap) { 608905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 609905e6a2fSBarry Smith } 610905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 61157b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 61257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 61357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 61457b952d6SSatish Balay } 61557b952d6SSatish Balay } 61657b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 61757b952d6SSatish Balay } 61857b952d6SSatish Balay } 61957b952d6SSatish Balay count--; 62057b952d6SSatish Balay } 62157b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 62257b952d6SSatish Balay 62357b952d6SSatish Balay /* wait on sends */ 62457b952d6SSatish Balay if (baij->nsends) { 62557b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 62657b952d6SSatish Balay CHKPTRQ(send_status); 62757b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 62857b952d6SSatish Balay PetscFree(send_status); 62957b952d6SSatish Balay } 63057b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 63157b952d6SSatish Balay 63257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 63357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 63457b952d6SSatish Balay 63557b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 63657b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 63757b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 63857b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 63957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 64057b952d6SSatish Balay } 64157b952d6SSatish Balay 6426d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 64357b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 64457b952d6SSatish Balay } 64557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 64657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 64757b952d6SSatish Balay 64857b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 64957b952d6SSatish Balay return 0; 65057b952d6SSatish Balay } 65157b952d6SSatish Balay 6525615d1e5SSatish Balay #undef __FUNC__ 6535615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 65457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 65557b952d6SSatish Balay { 65657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 65757b952d6SSatish Balay int ierr; 65857b952d6SSatish Balay 65957b952d6SSatish Balay if (baij->size == 1) { 66057b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 66157b952d6SSatish Balay } 662e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 66357b952d6SSatish Balay return 0; 66457b952d6SSatish Balay } 66557b952d6SSatish Balay 6665615d1e5SSatish Balay #undef __FUNC__ 6675615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 66857b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 66957b952d6SSatish Balay { 67057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 671cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 67257b952d6SSatish Balay FILE *fd; 67357b952d6SSatish Balay ViewerType vtype; 67457b952d6SSatish Balay 67557b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 67657b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 67757b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 678639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 6794e220ebcSLois Curfman McInnes MatInfo info; 68057b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 68157b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6824e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 68357b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 68457b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 6854e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 6864e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 6874e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 6884e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 6894e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 6904e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 69157b952d6SSatish Balay fflush(fd); 69257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 69357b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 69457b952d6SSatish Balay return 0; 69557b952d6SSatish Balay } 696639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 697bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 69857b952d6SSatish Balay return 0; 69957b952d6SSatish Balay } 70057b952d6SSatish Balay } 70157b952d6SSatish Balay 70257b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 70357b952d6SSatish Balay Draw draw; 70457b952d6SSatish Balay PetscTruth isnull; 70557b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 70657b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 70757b952d6SSatish Balay } 70857b952d6SSatish Balay 70957b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 71057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 71157b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 71257b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 71357b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 71457b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 71557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 71657b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 71757b952d6SSatish Balay fflush(fd); 71857b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 71957b952d6SSatish Balay } 72057b952d6SSatish Balay else { 72157b952d6SSatish Balay int size = baij->size; 72257b952d6SSatish Balay rank = baij->rank; 72357b952d6SSatish Balay if (size == 1) { 72457b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 72557b952d6SSatish Balay } 72657b952d6SSatish Balay else { 72757b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 72857b952d6SSatish Balay Mat A; 72957b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 73057b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 73157b952d6SSatish Balay int mbs=baij->mbs; 73257b952d6SSatish Balay Scalar *a; 73357b952d6SSatish Balay 73457b952d6SSatish Balay if (!rank) { 735cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 73657b952d6SSatish Balay CHKERRQ(ierr); 73757b952d6SSatish Balay } 73857b952d6SSatish Balay else { 739cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 74057b952d6SSatish Balay CHKERRQ(ierr); 74157b952d6SSatish Balay } 74257b952d6SSatish Balay PLogObjectParent(mat,A); 74357b952d6SSatish Balay 74457b952d6SSatish Balay /* copy over the A part */ 74557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 74657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 74757b952d6SSatish Balay row = baij->rstart; 74857b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 74957b952d6SSatish Balay 75057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 75157b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 75257b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 75357b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 75457b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 75557b952d6SSatish Balay for (k=0; k<bs; k++ ) { 756cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 757cee3aa6bSSatish Balay col++; a += bs; 75857b952d6SSatish Balay } 75957b952d6SSatish Balay } 76057b952d6SSatish Balay } 76157b952d6SSatish Balay /* copy over the B part */ 76257b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 76357b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 76457b952d6SSatish Balay row = baij->rstart*bs; 76557b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 76657b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 76757b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 76857b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 76957b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 77057b952d6SSatish Balay for (k=0; k<bs; k++ ) { 771cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 772cee3aa6bSSatish Balay col++; a += bs; 77357b952d6SSatish Balay } 77457b952d6SSatish Balay } 77557b952d6SSatish Balay } 77657b952d6SSatish Balay PetscFree(rvals); 7776d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7786d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 77957b952d6SSatish Balay if (!rank) { 78057b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 78157b952d6SSatish Balay } 78257b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 78357b952d6SSatish Balay } 78457b952d6SSatish Balay } 78557b952d6SSatish Balay return 0; 78657b952d6SSatish Balay } 78757b952d6SSatish Balay 78857b952d6SSatish Balay 78957b952d6SSatish Balay 7905615d1e5SSatish Balay #undef __FUNC__ 7915615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 792ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 79357b952d6SSatish Balay { 79457b952d6SSatish Balay Mat mat = (Mat) obj; 79557b952d6SSatish Balay int ierr; 79657b952d6SSatish Balay ViewerType vtype; 79757b952d6SSatish Balay 79857b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 79957b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 80057b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 80157b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 80257b952d6SSatish Balay } 80357b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 80457b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 80557b952d6SSatish Balay } 80657b952d6SSatish Balay return 0; 80757b952d6SSatish Balay } 80857b952d6SSatish Balay 8095615d1e5SSatish Balay #undef __FUNC__ 8105615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 811ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 81279bdfe76SSatish Balay { 81379bdfe76SSatish Balay Mat mat = (Mat) obj; 81479bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 81579bdfe76SSatish Balay int ierr; 81679bdfe76SSatish Balay 81779bdfe76SSatish Balay #if defined(PETSC_LOG) 81879bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 81979bdfe76SSatish Balay #endif 82079bdfe76SSatish Balay 82179bdfe76SSatish Balay PetscFree(baij->rowners); 82279bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 82379bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 82479bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 82579bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 82679bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 82779bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 82879bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 82930793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 83079bdfe76SSatish Balay PetscFree(baij); 83190f02eecSBarry Smith if (mat->mapping) { 83290f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 83390f02eecSBarry Smith } 83479bdfe76SSatish Balay PLogObjectDestroy(mat); 83579bdfe76SSatish Balay PetscHeaderDestroy(mat); 83679bdfe76SSatish Balay return 0; 83779bdfe76SSatish Balay } 83879bdfe76SSatish Balay 8395615d1e5SSatish Balay #undef __FUNC__ 8405615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 841ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 842cee3aa6bSSatish Balay { 843cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 84447b4a8eaSLois Curfman McInnes int ierr, nt; 845cee3aa6bSSatish Balay 846c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 84747b4a8eaSLois Curfman McInnes if (nt != a->n) { 848ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 84947b4a8eaSLois Curfman McInnes } 850c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 85147b4a8eaSLois Curfman McInnes if (nt != a->m) { 852e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 85347b4a8eaSLois Curfman McInnes } 85443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 855cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 85643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 857cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 85843a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 859cee3aa6bSSatish Balay return 0; 860cee3aa6bSSatish Balay } 861cee3aa6bSSatish Balay 8625615d1e5SSatish Balay #undef __FUNC__ 8635615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 864ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 865cee3aa6bSSatish Balay { 866cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 867cee3aa6bSSatish Balay int ierr; 86843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 869cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 87043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 871cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 872cee3aa6bSSatish Balay return 0; 873cee3aa6bSSatish Balay } 874cee3aa6bSSatish Balay 8755615d1e5SSatish Balay #undef __FUNC__ 8765615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 877ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 878cee3aa6bSSatish Balay { 879cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 880cee3aa6bSSatish Balay int ierr; 881cee3aa6bSSatish Balay 882cee3aa6bSSatish Balay /* do nondiagonal part */ 883cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 884cee3aa6bSSatish Balay /* send it on its way */ 885537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 886cee3aa6bSSatish Balay /* do local part */ 887cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 888cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 889cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 890cee3aa6bSSatish Balay /* but is not perhaps always true. */ 891639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 892cee3aa6bSSatish Balay return 0; 893cee3aa6bSSatish Balay } 894cee3aa6bSSatish Balay 8955615d1e5SSatish Balay #undef __FUNC__ 8965615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 897ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 898cee3aa6bSSatish Balay { 899cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 900cee3aa6bSSatish Balay int ierr; 901cee3aa6bSSatish Balay 902cee3aa6bSSatish Balay /* do nondiagonal part */ 903cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 904cee3aa6bSSatish Balay /* send it on its way */ 905537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 906cee3aa6bSSatish Balay /* do local part */ 907cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 908cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 909cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 910cee3aa6bSSatish Balay /* but is not perhaps always true. */ 911537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 912cee3aa6bSSatish Balay return 0; 913cee3aa6bSSatish Balay } 914cee3aa6bSSatish Balay 915cee3aa6bSSatish Balay /* 916cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 917cee3aa6bSSatish Balay diagonal block 918cee3aa6bSSatish Balay */ 9195615d1e5SSatish Balay #undef __FUNC__ 9205615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 921ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 922cee3aa6bSSatish Balay { 923cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 924cee3aa6bSSatish Balay if (a->M != a->N) 925e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 926cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 927cee3aa6bSSatish Balay } 928cee3aa6bSSatish Balay 9295615d1e5SSatish Balay #undef __FUNC__ 9305615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 931ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 932cee3aa6bSSatish Balay { 933cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 934cee3aa6bSSatish Balay int ierr; 935cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 936cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 937cee3aa6bSSatish Balay return 0; 938cee3aa6bSSatish Balay } 939026e39d0SSatish Balay 9405615d1e5SSatish Balay #undef __FUNC__ 9415615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 942ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 94357b952d6SSatish Balay { 94457b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 94557b952d6SSatish Balay *m = mat->M; *n = mat->N; 94657b952d6SSatish Balay return 0; 94757b952d6SSatish Balay } 94857b952d6SSatish Balay 9495615d1e5SSatish Balay #undef __FUNC__ 9505615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 951ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 95257b952d6SSatish Balay { 95357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 95457b952d6SSatish Balay *m = mat->m; *n = mat->N; 95557b952d6SSatish Balay return 0; 95657b952d6SSatish Balay } 95757b952d6SSatish Balay 9585615d1e5SSatish Balay #undef __FUNC__ 9595615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 960ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 96157b952d6SSatish Balay { 96257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 96357b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 96457b952d6SSatish Balay return 0; 96557b952d6SSatish Balay } 96657b952d6SSatish Balay 967acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 968acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 969acdf5bf4SSatish Balay 9705615d1e5SSatish Balay #undef __FUNC__ 9715615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 972acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 973acdf5bf4SSatish Balay { 974acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 975acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 976acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 977d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 978d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 979acdf5bf4SSatish Balay 980e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 981acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 982acdf5bf4SSatish Balay 983acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 984acdf5bf4SSatish Balay /* 985acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 986acdf5bf4SSatish Balay */ 987acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 988bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 989bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 990acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 991acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 992acdf5bf4SSatish Balay } 993acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 994acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 995acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 996acdf5bf4SSatish Balay } 997acdf5bf4SSatish Balay 998acdf5bf4SSatish Balay 999e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 1000d9d09a02SSatish Balay lrow = row - brstart; 1001acdf5bf4SSatish Balay 1002acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1003acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1004acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1005acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1006acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1007acdf5bf4SSatish Balay nztot = nzA + nzB; 1008acdf5bf4SSatish Balay 1009acdf5bf4SSatish Balay cmap = mat->garray; 1010acdf5bf4SSatish Balay if (v || idx) { 1011acdf5bf4SSatish Balay if (nztot) { 1012acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1013acdf5bf4SSatish Balay int imark = -1; 1014acdf5bf4SSatish Balay if (v) { 1015acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1016acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1017d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1018acdf5bf4SSatish Balay else break; 1019acdf5bf4SSatish Balay } 1020acdf5bf4SSatish Balay imark = i; 1021acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1022acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1023acdf5bf4SSatish Balay } 1024acdf5bf4SSatish Balay if (idx) { 1025acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1026acdf5bf4SSatish Balay if (imark > -1) { 1027acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1028bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1029acdf5bf4SSatish Balay } 1030acdf5bf4SSatish Balay } else { 1031acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1032d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1033d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1034acdf5bf4SSatish Balay else break; 1035acdf5bf4SSatish Balay } 1036acdf5bf4SSatish Balay imark = i; 1037acdf5bf4SSatish Balay } 1038d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1039d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1040acdf5bf4SSatish Balay } 1041acdf5bf4SSatish Balay } 1042d212a18eSSatish Balay else { 1043d212a18eSSatish Balay if (idx) *idx = 0; 1044d212a18eSSatish Balay if (v) *v = 0; 1045d212a18eSSatish Balay } 1046acdf5bf4SSatish Balay } 1047acdf5bf4SSatish Balay *nz = nztot; 1048acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1049acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1050acdf5bf4SSatish Balay return 0; 1051acdf5bf4SSatish Balay } 1052acdf5bf4SSatish Balay 10535615d1e5SSatish Balay #undef __FUNC__ 10545615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1055acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1056acdf5bf4SSatish Balay { 1057acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1058acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1059e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 1060acdf5bf4SSatish Balay } 1061acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 1062acdf5bf4SSatish Balay return 0; 1063acdf5bf4SSatish Balay } 1064acdf5bf4SSatish Balay 10655615d1e5SSatish Balay #undef __FUNC__ 10665615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1067ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 10685a838052SSatish Balay { 10695a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 10705a838052SSatish Balay *bs = baij->bs; 10715a838052SSatish Balay return 0; 10725a838052SSatish Balay } 10735a838052SSatish Balay 10745615d1e5SSatish Balay #undef __FUNC__ 10755615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1076ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 107758667388SSatish Balay { 107858667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 107958667388SSatish Balay int ierr; 108058667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 108158667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 108258667388SSatish Balay return 0; 108358667388SSatish Balay } 10840ac07820SSatish Balay 10855615d1e5SSatish Balay #undef __FUNC__ 10865615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1087ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 10880ac07820SSatish Balay { 10894e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 10904e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 10917d57db60SLois Curfman McInnes int ierr; 10927d57db60SLois Curfman McInnes double isend[5], irecv[5]; 10930ac07820SSatish Balay 10944e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 10954e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 10964e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 10974e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 10984e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 10994e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11004e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 11014e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11024e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 11030ac07820SSatish Balay if (flag == MAT_LOCAL) { 11044e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11054e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11064e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11074e220ebcSLois Curfman McInnes info->memory = isend[3]; 11084e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11090ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1110dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 11114e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11124e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11134e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11144e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11154e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11160ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1117dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 11184e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11194e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11204e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11214e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11224e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11230ac07820SSatish Balay } 11244e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 11254e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 11264e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 11270ac07820SSatish Balay return 0; 11280ac07820SSatish Balay } 11290ac07820SSatish Balay 11305615d1e5SSatish Balay #undef __FUNC__ 11315615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1132ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 113358667388SSatish Balay { 113458667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 113558667388SSatish Balay 113658667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 113758667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 11386da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1139c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 1140c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1141b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1142b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1143b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1144aeafbbfcSLois Curfman McInnes a->roworiented = 1; 114558667388SSatish Balay MatSetOption(a->A,op); 114658667388SSatish Balay MatSetOption(a->B,op); 1147b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11486da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 114958667388SSatish Balay op == MAT_SYMMETRIC || 115058667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 115158667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 115258667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 115358667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 115458667388SSatish Balay a->roworiented = 0; 115558667388SSatish Balay MatSetOption(a->A,op); 115658667388SSatish Balay MatSetOption(a->B,op); 1157*2b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 115890f02eecSBarry Smith a->donotstash = 1; 115990f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1160e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 116158667388SSatish Balay else 1162e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 116358667388SSatish Balay return 0; 116458667388SSatish Balay } 116558667388SSatish Balay 11665615d1e5SSatish Balay #undef __FUNC__ 11675615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1168ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 11690ac07820SSatish Balay { 11700ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 11710ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 11720ac07820SSatish Balay Mat B; 11730ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 11740ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 11750ac07820SSatish Balay Scalar *a; 11760ac07820SSatish Balay 11770ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 1178e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 11790ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 11800ac07820SSatish Balay CHKERRQ(ierr); 11810ac07820SSatish Balay 11820ac07820SSatish Balay /* copy over the A part */ 11830ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 11840ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 11850ac07820SSatish Balay row = baij->rstart; 11860ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 11870ac07820SSatish Balay 11880ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 11890ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 11900ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 11910ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 11920ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 11930ac07820SSatish Balay for (k=0; k<bs; k++ ) { 11940ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 11950ac07820SSatish Balay col++; a += bs; 11960ac07820SSatish Balay } 11970ac07820SSatish Balay } 11980ac07820SSatish Balay } 11990ac07820SSatish Balay /* copy over the B part */ 12000ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 12010ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12020ac07820SSatish Balay row = baij->rstart*bs; 12030ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12040ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12050ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12060ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12070ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12080ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12090ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12100ac07820SSatish Balay col++; a += bs; 12110ac07820SSatish Balay } 12120ac07820SSatish Balay } 12130ac07820SSatish Balay } 12140ac07820SSatish Balay PetscFree(rvals); 12150ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12160ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12170ac07820SSatish Balay 12180ac07820SSatish Balay if (matout != PETSC_NULL) { 12190ac07820SSatish Balay *matout = B; 12200ac07820SSatish Balay } else { 12210ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 12220ac07820SSatish Balay PetscFree(baij->rowners); 12230ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 12240ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 12250ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 12260ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 12270ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 12280ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 12290ac07820SSatish Balay PetscFree(baij); 12300ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 12310ac07820SSatish Balay PetscHeaderDestroy(B); 12320ac07820SSatish Balay } 12330ac07820SSatish Balay return 0; 12340ac07820SSatish Balay } 12350e95ebc0SSatish Balay 12365615d1e5SSatish Balay #undef __FUNC__ 12375615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 12380e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 12390e95ebc0SSatish Balay { 12400e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 12410e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 12420e95ebc0SSatish Balay int ierr,s1,s2,s3; 12430e95ebc0SSatish Balay 12440e95ebc0SSatish Balay if (ll) { 12450e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 12460e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1247e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 12480e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 12490e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 12500e95ebc0SSatish Balay } 1251e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 12520e95ebc0SSatish Balay return 0; 12530e95ebc0SSatish Balay } 12540e95ebc0SSatish Balay 12550ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 12560ac07820SSatish Balay matrix is square and the column and row owerships are identical. 12570ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 12580ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 12590ac07820SSatish Balay routine. 12600ac07820SSatish Balay */ 12615615d1e5SSatish Balay #undef __FUNC__ 12625615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1263ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 12640ac07820SSatish Balay { 12650ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 12660ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 12670ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 12680ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 12690ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 12700ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 12710ac07820SSatish Balay MPI_Comm comm = A->comm; 12720ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 12730ac07820SSatish Balay MPI_Status recv_status,*send_status; 12740ac07820SSatish Balay IS istmp; 12750ac07820SSatish Balay 12760ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 12770ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 12780ac07820SSatish Balay 12790ac07820SSatish Balay /* first count number of contributors to each processor */ 12800ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 12810ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 12820ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 12830ac07820SSatish Balay for ( i=0; i<N; i++ ) { 12840ac07820SSatish Balay idx = rows[i]; 12850ac07820SSatish Balay found = 0; 12860ac07820SSatish Balay for ( j=0; j<size; j++ ) { 12870ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 12880ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 12890ac07820SSatish Balay } 12900ac07820SSatish Balay } 1291e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 12920ac07820SSatish Balay } 12930ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 12940ac07820SSatish Balay 12950ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 12960ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 12970ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 12980ac07820SSatish Balay nrecvs = work[rank]; 12990ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 13000ac07820SSatish Balay nmax = work[rank]; 13010ac07820SSatish Balay PetscFree(work); 13020ac07820SSatish Balay 13030ac07820SSatish Balay /* post receives: */ 13040ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 13050ac07820SSatish Balay CHKPTRQ(rvalues); 13060ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 13070ac07820SSatish Balay CHKPTRQ(recv_waits); 13080ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13090ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 13100ac07820SSatish Balay } 13110ac07820SSatish Balay 13120ac07820SSatish Balay /* do sends: 13130ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13140ac07820SSatish Balay the ith processor 13150ac07820SSatish Balay */ 13160ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 13170ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 13180ac07820SSatish Balay CHKPTRQ(send_waits); 13190ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 13200ac07820SSatish Balay starts[0] = 0; 13210ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13220ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13230ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 13240ac07820SSatish Balay } 13250ac07820SSatish Balay ISRestoreIndices(is,&rows); 13260ac07820SSatish Balay 13270ac07820SSatish Balay starts[0] = 0; 13280ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13290ac07820SSatish Balay count = 0; 13300ac07820SSatish Balay for ( i=0; i<size; i++ ) { 13310ac07820SSatish Balay if (procs[i]) { 13320ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 13330ac07820SSatish Balay } 13340ac07820SSatish Balay } 13350ac07820SSatish Balay PetscFree(starts); 13360ac07820SSatish Balay 13370ac07820SSatish Balay base = owners[rank]*bs; 13380ac07820SSatish Balay 13390ac07820SSatish Balay /* wait on receives */ 13400ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 13410ac07820SSatish Balay source = lens + nrecvs; 13420ac07820SSatish Balay count = nrecvs; slen = 0; 13430ac07820SSatish Balay while (count) { 13440ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 13450ac07820SSatish Balay /* unpack receives into our local space */ 13460ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 13470ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 13480ac07820SSatish Balay lens[imdex] = n; 13490ac07820SSatish Balay slen += n; 13500ac07820SSatish Balay count--; 13510ac07820SSatish Balay } 13520ac07820SSatish Balay PetscFree(recv_waits); 13530ac07820SSatish Balay 13540ac07820SSatish Balay /* move the data into the send scatter */ 13550ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 13560ac07820SSatish Balay count = 0; 13570ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13580ac07820SSatish Balay values = rvalues + i*nmax; 13590ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 13600ac07820SSatish Balay lrows[count++] = values[j] - base; 13610ac07820SSatish Balay } 13620ac07820SSatish Balay } 13630ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 13640ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 13650ac07820SSatish Balay 13660ac07820SSatish Balay /* actually zap the local rows */ 1367537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 13680ac07820SSatish Balay PLogObjectParent(A,istmp); 13690ac07820SSatish Balay PetscFree(lrows); 13700ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 13710ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 13720ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 13730ac07820SSatish Balay 13740ac07820SSatish Balay /* wait on sends */ 13750ac07820SSatish Balay if (nsends) { 13760ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 13770ac07820SSatish Balay CHKPTRQ(send_status); 13780ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 13790ac07820SSatish Balay PetscFree(send_status); 13800ac07820SSatish Balay } 13810ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 13820ac07820SSatish Balay 13830ac07820SSatish Balay return 0; 13840ac07820SSatish Balay } 1385ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 13865615d1e5SSatish Balay #undef __FUNC__ 13875615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1388ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1389ba4ca20aSSatish Balay { 1390ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1391ba4ca20aSSatish Balay 1392ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1393ba4ca20aSSatish Balay else return 0; 1394ba4ca20aSSatish Balay } 13950ac07820SSatish Balay 13965615d1e5SSatish Balay #undef __FUNC__ 13975615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1398ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1399bb5a7306SBarry Smith { 1400bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1401bb5a7306SBarry Smith int ierr; 1402bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1403bb5a7306SBarry Smith return 0; 1404bb5a7306SBarry Smith } 1405bb5a7306SBarry Smith 14060ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14070ac07820SSatish Balay 140879bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 140979bdfe76SSatish Balay static struct _MatOps MatOps = { 1410bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14114c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14124c50302cSBarry Smith 0,0,0,0, 14130ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14140e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 141558667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14164c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14174c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14184c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 141994a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1420d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1421ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1422bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1423ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 142479bdfe76SSatish Balay 142579bdfe76SSatish Balay 14265615d1e5SSatish Balay #undef __FUNC__ 14275615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 142879bdfe76SSatish Balay /*@C 142979bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 143079bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 143179bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 143279bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 143379bdfe76SSatish Balay performance can be increased by more than a factor of 50. 143479bdfe76SSatish Balay 143579bdfe76SSatish Balay Input Parameters: 143679bdfe76SSatish Balay . comm - MPI communicator 143779bdfe76SSatish Balay . bs - size of blockk 143879bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 143992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 144092e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 144192e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 144292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 144392e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 144479bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 144592e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 144679bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 144779bdfe76SSatish Balay submatrix (same for all local rows) 144892e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 144992e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 145092e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 145192e8d321SLois Curfman McInnes it is zero. 145292e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 145379bdfe76SSatish Balay submatrix (same for all local rows). 145492e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 145592e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 145692e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 145779bdfe76SSatish Balay 145879bdfe76SSatish Balay Output Parameter: 145979bdfe76SSatish Balay . A - the matrix 146079bdfe76SSatish Balay 146179bdfe76SSatish Balay Notes: 146279bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 146379bdfe76SSatish Balay (possibly both). 146479bdfe76SSatish Balay 146579bdfe76SSatish Balay Storage Information: 146679bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 146779bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 146879bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 146979bdfe76SSatish Balay local matrix (a rectangular submatrix). 147079bdfe76SSatish Balay 147179bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 147279bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 147379bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 147479bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 147579bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 147679bdfe76SSatish Balay 147779bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 147879bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 147979bdfe76SSatish Balay 148079bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 148179bdfe76SSatish Balay $ ------------------- 148279bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 148379bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 148479bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 148579bdfe76SSatish Balay $ ------------------- 148679bdfe76SSatish Balay $ 148779bdfe76SSatish Balay 148879bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 148979bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 149079bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 149157b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 149279bdfe76SSatish Balay 149379bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 149479bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 149579bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 149692e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 149792e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14986da5968aSLois Curfman McInnes matrices. 149979bdfe76SSatish Balay 150092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 150179bdfe76SSatish Balay 150279bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 150379bdfe76SSatish Balay @*/ 150479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 150579bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 150679bdfe76SSatish Balay { 150779bdfe76SSatish Balay Mat B; 150879bdfe76SSatish Balay Mat_MPIBAIJ *b; 15094c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 151079bdfe76SSatish Balay 1511e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 151279bdfe76SSatish Balay *A = 0; 151379bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 151479bdfe76SSatish Balay PLogObjectCreate(B); 151579bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 151679bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 151779bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15184c50302cSBarry Smith MPI_Comm_size(comm,&size); 15194c50302cSBarry Smith if (size == 1) { 15204c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 15214c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 15224c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 15234c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 15244c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 15254c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 15264c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 15274c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 15284c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 15294c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 15304c50302cSBarry Smith } 15314c50302cSBarry Smith 153279bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 153379bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 153490f02eecSBarry Smith B->mapping = 0; 153579bdfe76SSatish Balay B->factor = 0; 153679bdfe76SSatish Balay B->assembled = PETSC_FALSE; 153779bdfe76SSatish Balay 1538e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 153979bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 154079bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 154179bdfe76SSatish Balay 154279bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1543e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1544e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1545e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1546cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1547cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 154879bdfe76SSatish Balay 154979bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 155079bdfe76SSatish Balay work[0] = m; work[1] = n; 155179bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 155279bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 155379bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 155479bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 155579bdfe76SSatish Balay } 155679bdfe76SSatish Balay if (m == PETSC_DECIDE) { 155779bdfe76SSatish Balay Mbs = M/bs; 1558e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 155979bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 156079bdfe76SSatish Balay m = mbs*bs; 156179bdfe76SSatish Balay } 156279bdfe76SSatish Balay if (n == PETSC_DECIDE) { 156379bdfe76SSatish Balay Nbs = N/bs; 1564e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 156579bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 156679bdfe76SSatish Balay n = nbs*bs; 156779bdfe76SSatish Balay } 1568e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 156979bdfe76SSatish Balay 157079bdfe76SSatish Balay b->m = m; B->m = m; 157179bdfe76SSatish Balay b->n = n; B->n = n; 157279bdfe76SSatish Balay b->N = N; B->N = N; 157379bdfe76SSatish Balay b->M = M; B->M = M; 157479bdfe76SSatish Balay b->bs = bs; 157579bdfe76SSatish Balay b->bs2 = bs*bs; 157679bdfe76SSatish Balay b->mbs = mbs; 157779bdfe76SSatish Balay b->nbs = nbs; 157879bdfe76SSatish Balay b->Mbs = Mbs; 157979bdfe76SSatish Balay b->Nbs = Nbs; 158079bdfe76SSatish Balay 158179bdfe76SSatish Balay /* build local table of row and column ownerships */ 158279bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 158379bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 15840ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 158579bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 158679bdfe76SSatish Balay b->rowners[0] = 0; 158779bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 158879bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 158979bdfe76SSatish Balay } 159079bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 159179bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 15924fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 15934fa0d573SSatish Balay b->rend_bs = b->rend * bs; 15944fa0d573SSatish Balay 159579bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 159679bdfe76SSatish Balay b->cowners[0] = 0; 159779bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 159879bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 159979bdfe76SSatish Balay } 160079bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 160179bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 16024fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 16034fa0d573SSatish Balay b->cend_bs = b->cend * bs; 160479bdfe76SSatish Balay 160579bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 160679bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 160779bdfe76SSatish Balay PLogObjectParent(B,b->A); 160879bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 160979bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 161079bdfe76SSatish Balay PLogObjectParent(B,b->B); 161179bdfe76SSatish Balay 161279bdfe76SSatish Balay /* build cache for off array entries formed */ 161379bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 161490f02eecSBarry Smith b->donotstash = 0; 161579bdfe76SSatish Balay b->colmap = 0; 161679bdfe76SSatish Balay b->garray = 0; 161779bdfe76SSatish Balay b->roworiented = 1; 161879bdfe76SSatish Balay 161930793edcSSatish Balay /* stuff used in block assembly */ 162030793edcSSatish Balay b->barray = 0; 162130793edcSSatish Balay 162279bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 162379bdfe76SSatish Balay b->lvec = 0; 162479bdfe76SSatish Balay b->Mvctx = 0; 162579bdfe76SSatish Balay 162679bdfe76SSatish Balay /* stuff for MatGetRow() */ 162779bdfe76SSatish Balay b->rowindices = 0; 162879bdfe76SSatish Balay b->rowvalues = 0; 162979bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 163079bdfe76SSatish Balay 163179bdfe76SSatish Balay *A = B; 163279bdfe76SSatish Balay return 0; 163379bdfe76SSatish Balay } 1634026e39d0SSatish Balay 16355615d1e5SSatish Balay #undef __FUNC__ 16365615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 16370ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 16380ac07820SSatish Balay { 16390ac07820SSatish Balay Mat mat; 16400ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 16410ac07820SSatish Balay int ierr, len=0, flg; 16420ac07820SSatish Balay 16430ac07820SSatish Balay *newmat = 0; 16440ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 16450ac07820SSatish Balay PLogObjectCreate(mat); 16460ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 16470ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 16480ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 16490ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 16500ac07820SSatish Balay mat->factor = matin->factor; 16510ac07820SSatish Balay mat->assembled = PETSC_TRUE; 16520ac07820SSatish Balay 16530ac07820SSatish Balay a->m = mat->m = oldmat->m; 16540ac07820SSatish Balay a->n = mat->n = oldmat->n; 16550ac07820SSatish Balay a->M = mat->M = oldmat->M; 16560ac07820SSatish Balay a->N = mat->N = oldmat->N; 16570ac07820SSatish Balay 16580ac07820SSatish Balay a->bs = oldmat->bs; 16590ac07820SSatish Balay a->bs2 = oldmat->bs2; 16600ac07820SSatish Balay a->mbs = oldmat->mbs; 16610ac07820SSatish Balay a->nbs = oldmat->nbs; 16620ac07820SSatish Balay a->Mbs = oldmat->Mbs; 16630ac07820SSatish Balay a->Nbs = oldmat->Nbs; 16640ac07820SSatish Balay 16650ac07820SSatish Balay a->rstart = oldmat->rstart; 16660ac07820SSatish Balay a->rend = oldmat->rend; 16670ac07820SSatish Balay a->cstart = oldmat->cstart; 16680ac07820SSatish Balay a->cend = oldmat->cend; 16690ac07820SSatish Balay a->size = oldmat->size; 16700ac07820SSatish Balay a->rank = oldmat->rank; 1671e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 16720ac07820SSatish Balay a->rowvalues = 0; 16730ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 167430793edcSSatish Balay a->barray = 0; 16750ac07820SSatish Balay 16760ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 16770ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 16780ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 16790ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 16800ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16810ac07820SSatish Balay if (oldmat->colmap) { 16820ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 16830ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 16840ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 16850ac07820SSatish Balay } else a->colmap = 0; 16860ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 16870ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 16880ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 16890ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 16900ac07820SSatish Balay } else a->garray = 0; 16910ac07820SSatish Balay 16920ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 16930ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 16940ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 16950ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 16960ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 16970ac07820SSatish Balay PLogObjectParent(mat,a->A); 16980ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 16990ac07820SSatish Balay PLogObjectParent(mat,a->B); 17000ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 17010ac07820SSatish Balay if (flg) { 17020ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 17030ac07820SSatish Balay } 17040ac07820SSatish Balay *newmat = mat; 17050ac07820SSatish Balay return 0; 17060ac07820SSatish Balay } 170757b952d6SSatish Balay 170857b952d6SSatish Balay #include "sys.h" 170957b952d6SSatish Balay 17105615d1e5SSatish Balay #undef __FUNC__ 17115615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 171257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 171357b952d6SSatish Balay { 171457b952d6SSatish Balay Mat A; 171557b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 171657b952d6SSatish Balay Scalar *vals,*buf; 171757b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 171857b952d6SSatish Balay MPI_Status status; 1719cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 172057b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 172157b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 172257b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 172357b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 172457b952d6SSatish Balay 172557b952d6SSatish Balay 172657b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 172757b952d6SSatish Balay bs2 = bs*bs; 172857b952d6SSatish Balay 172957b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 173057b952d6SSatish Balay if (!rank) { 173157b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 173257b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1733e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 173457b952d6SSatish Balay } 173557b952d6SSatish Balay 173657b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 173757b952d6SSatish Balay M = header[1]; N = header[2]; 173857b952d6SSatish Balay 1739e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 174057b952d6SSatish Balay 174157b952d6SSatish Balay /* 174257b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 174357b952d6SSatish Balay divisible by the blocksize 174457b952d6SSatish Balay */ 174557b952d6SSatish Balay Mbs = M/bs; 174657b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 174757b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 174857b952d6SSatish Balay else Mbs++; 174957b952d6SSatish Balay if (extra_rows &&!rank) { 1750b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 175157b952d6SSatish Balay } 1752537820f0SBarry Smith 175357b952d6SSatish Balay /* determine ownership of all rows */ 175457b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 175557b952d6SSatish Balay m = mbs * bs; 1756cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1757cee3aa6bSSatish Balay browners = rowners + size + 1; 175857b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 175957b952d6SSatish Balay rowners[0] = 0; 1760cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1761cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 176257b952d6SSatish Balay rstart = rowners[rank]; 176357b952d6SSatish Balay rend = rowners[rank+1]; 176457b952d6SSatish Balay 176557b952d6SSatish Balay /* distribute row lengths to all processors */ 176657b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 176757b952d6SSatish Balay if (!rank) { 176857b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 176957b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 177057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 177157b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1772cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1773cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 177457b952d6SSatish Balay PetscFree(sndcounts); 177557b952d6SSatish Balay } 177657b952d6SSatish Balay else { 177757b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 177857b952d6SSatish Balay } 177957b952d6SSatish Balay 178057b952d6SSatish Balay if (!rank) { 178157b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 178257b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 178357b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 178457b952d6SSatish Balay for ( i=0; i<size; i++ ) { 178557b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 178657b952d6SSatish Balay procsnz[i] += rowlengths[j]; 178757b952d6SSatish Balay } 178857b952d6SSatish Balay } 178957b952d6SSatish Balay PetscFree(rowlengths); 179057b952d6SSatish Balay 179157b952d6SSatish Balay /* determine max buffer needed and allocate it */ 179257b952d6SSatish Balay maxnz = 0; 179357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 179457b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 179557b952d6SSatish Balay } 179657b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 179757b952d6SSatish Balay 179857b952d6SSatish Balay /* read in my part of the matrix column indices */ 179957b952d6SSatish Balay nz = procsnz[0]; 180057b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 180157b952d6SSatish Balay mycols = ibuf; 1802cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 180357b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1804cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1805cee3aa6bSSatish Balay 180657b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 180757b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 180857b952d6SSatish Balay nz = procsnz[i]; 180957b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 181057b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 181157b952d6SSatish Balay } 181257b952d6SSatish Balay /* read in the stuff for the last proc */ 181357b952d6SSatish Balay if ( size != 1 ) { 181457b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 181557b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 181657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1817cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 181857b952d6SSatish Balay } 181957b952d6SSatish Balay PetscFree(cols); 182057b952d6SSatish Balay } 182157b952d6SSatish Balay else { 182257b952d6SSatish Balay /* determine buffer space needed for message */ 182357b952d6SSatish Balay nz = 0; 182457b952d6SSatish Balay for ( i=0; i<m; i++ ) { 182557b952d6SSatish Balay nz += locrowlens[i]; 182657b952d6SSatish Balay } 182757b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 182857b952d6SSatish Balay mycols = ibuf; 182957b952d6SSatish Balay /* receive message of column indices*/ 183057b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 183157b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1832e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 183357b952d6SSatish Balay } 183457b952d6SSatish Balay 183557b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1836cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1837cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 183857b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1839cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 184057b952d6SSatish Balay masked1 = mask + Mbs; 184157b952d6SSatish Balay masked2 = masked1 + Mbs; 184257b952d6SSatish Balay rowcount = 0; nzcount = 0; 184357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 184457b952d6SSatish Balay dcount = 0; 184557b952d6SSatish Balay odcount = 0; 184657b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 184757b952d6SSatish Balay kmax = locrowlens[rowcount]; 184857b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 184957b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 185057b952d6SSatish Balay if (!mask[tmp]) { 185157b952d6SSatish Balay mask[tmp] = 1; 185257b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 185357b952d6SSatish Balay else masked1[dcount++] = tmp; 185457b952d6SSatish Balay } 185557b952d6SSatish Balay } 185657b952d6SSatish Balay rowcount++; 185757b952d6SSatish Balay } 1858cee3aa6bSSatish Balay 185957b952d6SSatish Balay dlens[i] = dcount; 186057b952d6SSatish Balay odlens[i] = odcount; 1861cee3aa6bSSatish Balay 186257b952d6SSatish Balay /* zero out the mask elements we set */ 186357b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 186457b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 186557b952d6SSatish Balay } 1866cee3aa6bSSatish Balay 186757b952d6SSatish Balay /* create our matrix */ 1868537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1869537820f0SBarry Smith CHKERRQ(ierr); 187057b952d6SSatish Balay A = *newmat; 18716d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 187257b952d6SSatish Balay 187357b952d6SSatish Balay if (!rank) { 187457b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 187557b952d6SSatish Balay /* read in my part of the matrix numerical values */ 187657b952d6SSatish Balay nz = procsnz[0]; 187757b952d6SSatish Balay vals = buf; 1878cee3aa6bSSatish Balay mycols = ibuf; 1879cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 188057b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1881cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1882537820f0SBarry Smith 188357b952d6SSatish Balay /* insert into matrix */ 188457b952d6SSatish Balay jj = rstart*bs; 188557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 188657b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 188757b952d6SSatish Balay mycols += locrowlens[i]; 188857b952d6SSatish Balay vals += locrowlens[i]; 188957b952d6SSatish Balay jj++; 189057b952d6SSatish Balay } 189157b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 189257b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 189357b952d6SSatish Balay nz = procsnz[i]; 189457b952d6SSatish Balay vals = buf; 189557b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 189657b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 189757b952d6SSatish Balay } 189857b952d6SSatish Balay /* the last proc */ 189957b952d6SSatish Balay if ( size != 1 ){ 190057b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1901cee3aa6bSSatish Balay vals = buf; 190257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 190357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1904cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 190557b952d6SSatish Balay } 190657b952d6SSatish Balay PetscFree(procsnz); 190757b952d6SSatish Balay } 190857b952d6SSatish Balay else { 190957b952d6SSatish Balay /* receive numeric values */ 191057b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 191157b952d6SSatish Balay 191257b952d6SSatish Balay /* receive message of values*/ 191357b952d6SSatish Balay vals = buf; 1914cee3aa6bSSatish Balay mycols = ibuf; 191557b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 191657b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1917e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 191857b952d6SSatish Balay 191957b952d6SSatish Balay /* insert into matrix */ 192057b952d6SSatish Balay jj = rstart*bs; 1921cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 192257b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 192357b952d6SSatish Balay mycols += locrowlens[i]; 192457b952d6SSatish Balay vals += locrowlens[i]; 192557b952d6SSatish Balay jj++; 192657b952d6SSatish Balay } 192757b952d6SSatish Balay } 192857b952d6SSatish Balay PetscFree(locrowlens); 192957b952d6SSatish Balay PetscFree(buf); 193057b952d6SSatish Balay PetscFree(ibuf); 193157b952d6SSatish Balay PetscFree(rowners); 193257b952d6SSatish Balay PetscFree(dlens); 1933cee3aa6bSSatish Balay PetscFree(mask); 19346d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19356d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 193657b952d6SSatish Balay return 0; 193757b952d6SSatish Balay } 193857b952d6SSatish Balay 193957b952d6SSatish Balay 1940