179bdfe76SSatish Balay #ifndef lint 2*ac7a638eSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.60 1997/03/27 20:43:27 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2257b952d6SSatish Balay 233b2fbd54SBarry Smith 24537820f0SBarry Smith /* 25537820f0SBarry Smith Local utility routine that creates a mapping from the global column 2657b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2757b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2857b952d6SSatish Balay length of colmap equals the global matrix length. 2957b952d6SSatish Balay */ 305615d1e5SSatish Balay #undef __FUNC__ 315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3357b952d6SSatish Balay { 3457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3557b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 36928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 3757b952d6SSatish Balay 3857b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 3957b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 4057b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 4257b952d6SSatish Balay return 0; 4357b952d6SSatish Balay } 4457b952d6SSatish Balay 455615d1e5SSatish Balay #undef __FUNC__ 465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ(" 473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 483b2fbd54SBarry Smith PetscTruth *done) 4957b952d6SSatish Balay { 503b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 5157b952d6SSatish Balay int ierr; 523b2fbd54SBarry Smith if (aij->size == 1) { 533b2fbd54SBarry Smith ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 553b2fbd54SBarry Smith return 0; 563b2fbd54SBarry Smith } 573b2fbd54SBarry Smith 585615d1e5SSatish Balay #undef __FUNC__ 595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ" 603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 613b2fbd54SBarry Smith PetscTruth *done) 623b2fbd54SBarry Smith { 633b2fbd54SBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data; 643b2fbd54SBarry Smith int ierr; 653b2fbd54SBarry Smith if (aij->size == 1) { 663b2fbd54SBarry Smith ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 67e3372554SBarry Smith } else SETERRQ(1,0,"not supported in parallel"); 6857b952d6SSatish Balay return 0; 6957b952d6SSatish Balay } 7080c1aa95SSatish Balay #define CHUNKSIZE 10 7180c1aa95SSatish Balay 72f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 7380c1aa95SSatish Balay { \ 7480c1aa95SSatish Balay \ 7580c1aa95SSatish Balay brow = row/bs; \ 7680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 77*ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 7880c1aa95SSatish Balay bcol = col/bs; \ 7980c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 80ab26458aSBarry Smith low = 0; high = nrow; \ 81ab26458aSBarry Smith while (high-low > 3) { \ 82ab26458aSBarry Smith t = (low+high)/2; \ 83ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 84ab26458aSBarry Smith else low = t; \ 85ab26458aSBarry Smith } \ 86ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 8780c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 8880c1aa95SSatish Balay if (rp[_i] == bcol) { \ 8980c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 90eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 91eada6651SSatish Balay else *bap = value; \ 92*ac7a638eSSatish Balay goto a_noinsert; \ 9380c1aa95SSatish Balay } \ 9480c1aa95SSatish Balay } \ 95*ac7a638eSSatish Balay if (a->nonew) goto a_noinsert; \ 9680c1aa95SSatish Balay if (nrow >= rmax) { \ 9780c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 9880c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 9980c1aa95SSatish Balay Scalar *new_a; \ 10080c1aa95SSatish Balay \ 10180c1aa95SSatish Balay /* malloc new storage space */ \ 10280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 10380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 10480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 10580c1aa95SSatish Balay new_i = new_j + new_nz; \ 10680c1aa95SSatish Balay \ 10780c1aa95SSatish Balay /* copy over old data into new slots */ \ 10880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 10980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 11080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 11180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 11280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 11380c1aa95SSatish Balay len*sizeof(int)); \ 11480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 11580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 11680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 11780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 11880c1aa95SSatish Balay /* free up old matrix storage */ \ 11980c1aa95SSatish Balay PetscFree(a->a); \ 12080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 12180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 12280c1aa95SSatish Balay a->singlemalloc = 1; \ 12380c1aa95SSatish Balay \ 12480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 125*ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 12680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 12780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 12880c1aa95SSatish Balay a->reallocs++; \ 12980c1aa95SSatish Balay a->nz++; \ 13080c1aa95SSatish Balay } \ 13180c1aa95SSatish Balay N = nrow++ - 1; \ 13280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 13380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 13480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 13580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 13680c1aa95SSatish Balay } \ 13780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 13880c1aa95SSatish Balay rp[_i] = bcol; \ 13980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 140*ac7a638eSSatish Balay a_noinsert:; \ 14180c1aa95SSatish Balay ailen[brow] = nrow; \ 14280c1aa95SSatish Balay } 14357b952d6SSatish Balay 144*ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 145*ac7a638eSSatish Balay { \ 146*ac7a638eSSatish Balay \ 147*ac7a638eSSatish Balay brow = row/bs; \ 148*ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 149*ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 150*ac7a638eSSatish Balay bcol = col/bs; \ 151*ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 152*ac7a638eSSatish Balay low = 0; high = nrow; \ 153*ac7a638eSSatish Balay while (high-low > 3) { \ 154*ac7a638eSSatish Balay t = (low+high)/2; \ 155*ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 156*ac7a638eSSatish Balay else low = t; \ 157*ac7a638eSSatish Balay } \ 158*ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 159*ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 160*ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 161*ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 162*ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 163*ac7a638eSSatish Balay else *bap = value; \ 164*ac7a638eSSatish Balay goto b_noinsert; \ 165*ac7a638eSSatish Balay } \ 166*ac7a638eSSatish Balay } \ 167*ac7a638eSSatish Balay if (a->nonew) goto b_noinsert; \ 168*ac7a638eSSatish Balay if (nrow >= rmax) { \ 169*ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 170*ac7a638eSSatish Balay int new_nz = bi[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 171*ac7a638eSSatish Balay Scalar *new_a; \ 172*ac7a638eSSatish Balay \ 173*ac7a638eSSatish Balay /* malloc new storage space */ \ 174*ac7a638eSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 175*ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 176*ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 177*ac7a638eSSatish Balay new_i = new_j + new_nz; \ 178*ac7a638eSSatish Balay \ 179*ac7a638eSSatish Balay /* copy over old data into new slots */ \ 180*ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 181*ac7a638eSSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 182*ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 183*ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 184*ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 185*ac7a638eSSatish Balay len*sizeof(int)); \ 186*ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 187*ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 188*ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 189*ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 190*ac7a638eSSatish Balay /* free up old matrix storage */ \ 191*ac7a638eSSatish Balay PetscFree(a->a); \ 192*ac7a638eSSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 193*ac7a638eSSatish Balay ba = a->a = new_a; bi = a->i = new_i; bj = a->j = new_j; \ 194*ac7a638eSSatish Balay a->singlemalloc = 1; \ 195*ac7a638eSSatish Balay \ 196*ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 197*ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 198*ac7a638eSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 199*ac7a638eSSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 200*ac7a638eSSatish Balay a->reallocs++; \ 201*ac7a638eSSatish Balay a->nz++; \ 202*ac7a638eSSatish Balay } \ 203*ac7a638eSSatish Balay N = nrow++ - 1; \ 204*ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 205*ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 206*ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 207*ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 208*ac7a638eSSatish Balay } \ 209*ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 210*ac7a638eSSatish Balay rp[_i] = bcol; \ 211*ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 212*ac7a638eSSatish Balay b_noinsert:; \ 213*ac7a638eSSatish Balay bilen[brow] = nrow; \ 214*ac7a638eSSatish Balay } 215*ac7a638eSSatish Balay 2165615d1e5SSatish Balay #undef __FUNC__ 2175615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 218ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 21957b952d6SSatish Balay { 22057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 22157b952d6SSatish Balay Scalar value; 2224fa0d573SSatish Balay int ierr,i,j,row,col; 2234fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2244fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2254fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 22657b952d6SSatish Balay 227eada6651SSatish Balay /* Some Variables required in the macro */ 22880c1aa95SSatish Balay Mat A = baij->A; 22980c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 230*ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 231*ac7a638eSSatish Balay Scalar *aa=a->a; 232*ac7a638eSSatish Balay 233*ac7a638eSSatish Balay Mat B = baij->B; 234*ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 235*ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 236*ac7a638eSSatish Balay Scalar *ba=b->a; 237*ac7a638eSSatish Balay 238*ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 239ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 240*ac7a638eSSatish Balay Scalar *ap,*bap; 24180c1aa95SSatish Balay 24257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 243639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 244e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 245e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 246639f9d9dSBarry Smith #endif 24757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 24857b952d6SSatish Balay row = im[i] - rstart_orig; 24957b952d6SSatish Balay for ( j=0; j<n; j++ ) { 25057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 25157b952d6SSatish Balay col = in[j] - cstart_orig; 25257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 253f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 25480c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25557b952d6SSatish Balay } 256639f9d9dSBarry Smith #if defined(PETSC_BOPT_g) 257e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 258e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 259639f9d9dSBarry Smith #endif 26057b952d6SSatish Balay else { 26157b952d6SSatish Balay if (mat->was_assembled) { 262905e6a2fSBarry Smith if (!baij->colmap) { 263905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 264905e6a2fSBarry Smith } 265905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 26657b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 26757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 26857b952d6SSatish Balay col = in[j]; 26957b952d6SSatish Balay } 27057b952d6SSatish Balay } 27157b952d6SSatish Balay else col = in[j]; 27257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 273*ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 274*ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 27557b952d6SSatish Balay } 27657b952d6SSatish Balay } 27757b952d6SSatish Balay } 27857b952d6SSatish Balay else { 27990f02eecSBarry Smith if (roworiented && !baij->donotstash) { 28057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 28157b952d6SSatish Balay } 28257b952d6SSatish Balay else { 28390f02eecSBarry Smith if (!baij->donotstash) { 28457b952d6SSatish Balay row = im[i]; 28557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 28657b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 28757b952d6SSatish Balay } 28857b952d6SSatish Balay } 28957b952d6SSatish Balay } 29057b952d6SSatish Balay } 29190f02eecSBarry Smith } 29257b952d6SSatish Balay return 0; 29357b952d6SSatish Balay } 29457b952d6SSatish Balay 295ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 296ab26458aSBarry Smith #undef __FUNC__ 297ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 298ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 299ab26458aSBarry Smith { 300ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 30130793edcSSatish Balay Scalar *value,*barray=baij->barray; 302abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 303ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 304ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 305ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 306ab26458aSBarry Smith 30730793edcSSatish Balay 30830793edcSSatish Balay if(!barray) { 30930793edcSSatish Balay barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 31030793edcSSatish Balay baij->barray = barray; 31130793edcSSatish Balay } 31230793edcSSatish Balay 313ab26458aSBarry Smith if (roworiented) { 314ab26458aSBarry Smith stepval = (n-1)*bs; 315ab26458aSBarry Smith } else { 316ab26458aSBarry Smith stepval = (m-1)*bs; 317ab26458aSBarry Smith } 318ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 319ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 320ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 321ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 322ab26458aSBarry Smith #endif 323ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 324ab26458aSBarry Smith row = im[i] - rstart; 325ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 326ab26458aSBarry Smith if (roworiented) { 327ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 328ab26458aSBarry Smith } else { 329ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 330abef11f7SSatish Balay } 331ab26458aSBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) 332ab26458aSBarry Smith for (jj=0; jj<bs; jj++ ) 33330793edcSSatish Balay *barray++ = *value++; 33430793edcSSatish Balay barray -=bs2; 335abef11f7SSatish Balay 336abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 337abef11f7SSatish Balay col = in[j] - cstart; 33830793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 339ab26458aSBarry Smith } 340ab26458aSBarry Smith #if defined(PETSC_BOPT_g) 341ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 342ab26458aSBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");} 343ab26458aSBarry Smith #endif 344ab26458aSBarry Smith else { 345ab26458aSBarry Smith if (mat->was_assembled) { 346ab26458aSBarry Smith if (!baij->colmap) { 347ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 348ab26458aSBarry Smith } 349ab26458aSBarry Smith col = baij->colmap[in[j]] - 1; 350ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 351ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 352ab26458aSBarry Smith col = in[j]; 353ab26458aSBarry Smith } 354ab26458aSBarry Smith } 355ab26458aSBarry Smith else col = in[j]; 35630793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 357ab26458aSBarry Smith } 358ab26458aSBarry Smith } 359ab26458aSBarry Smith } 360ab26458aSBarry Smith else { 361ab26458aSBarry Smith if (!baij->donotstash) { 362ab26458aSBarry Smith if (roworiented ) { 363abef11f7SSatish Balay row = im[i]*bs; 364abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 365abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 366abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 367abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 368abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 369abef11f7SSatish Balay } 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 372ab26458aSBarry Smith } 373ab26458aSBarry Smith else { 374ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 375abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 376abef11f7SSatish Balay col = in[j]*bs; 377abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 378abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 379abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 380ab26458aSBarry Smith } 381ab26458aSBarry Smith } 382ab26458aSBarry Smith } 383abef11f7SSatish Balay 384abef11f7SSatish Balay } 385abef11f7SSatish Balay } 386ab26458aSBarry Smith } 387ab26458aSBarry Smith } 388ab26458aSBarry Smith return 0; 389ab26458aSBarry Smith } 390ab26458aSBarry Smith 3915615d1e5SSatish Balay #undef __FUNC__ 3925615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 393ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 394d6de1c52SSatish Balay { 395d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 396d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 397d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 398d6de1c52SSatish Balay 399d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 400e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 401e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 402d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 403d6de1c52SSatish Balay row = idxm[i] - bsrstart; 404d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 405e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 406e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 407d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 408d6de1c52SSatish Balay col = idxn[j] - bscstart; 409d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 410d6de1c52SSatish Balay } 411d6de1c52SSatish Balay else { 412905e6a2fSBarry Smith if (!baij->colmap) { 413905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 414905e6a2fSBarry Smith } 415e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 416dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 417d9d09a02SSatish Balay else { 418dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 419d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 420d6de1c52SSatish Balay } 421d6de1c52SSatish Balay } 422d6de1c52SSatish Balay } 423d9d09a02SSatish Balay } 424d6de1c52SSatish Balay else { 425e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 426d6de1c52SSatish Balay } 427d6de1c52SSatish Balay } 428d6de1c52SSatish Balay return 0; 429d6de1c52SSatish Balay } 430d6de1c52SSatish Balay 4315615d1e5SSatish Balay #undef __FUNC__ 4325615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 433ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 434d6de1c52SSatish Balay { 435d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 436d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 437acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 438d6de1c52SSatish Balay double sum = 0.0; 439d6de1c52SSatish Balay Scalar *v; 440d6de1c52SSatish Balay 441d6de1c52SSatish Balay if (baij->size == 1) { 442d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 443d6de1c52SSatish Balay } else { 444d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 445d6de1c52SSatish Balay v = amat->a; 446d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 447d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 448d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 449d6de1c52SSatish Balay #else 450d6de1c52SSatish Balay sum += (*v)*(*v); v++; 451d6de1c52SSatish Balay #endif 452d6de1c52SSatish Balay } 453d6de1c52SSatish Balay v = bmat->a; 454d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 455d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 456d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 457d6de1c52SSatish Balay #else 458d6de1c52SSatish Balay sum += (*v)*(*v); v++; 459d6de1c52SSatish Balay #endif 460d6de1c52SSatish Balay } 461d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 462d6de1c52SSatish Balay *norm = sqrt(*norm); 463d6de1c52SSatish Balay } 464acdf5bf4SSatish Balay else 465e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 466d6de1c52SSatish Balay } 467d6de1c52SSatish Balay return 0; 468d6de1c52SSatish Balay } 46957b952d6SSatish Balay 4705615d1e5SSatish Balay #undef __FUNC__ 4715615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 472ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 47357b952d6SSatish Balay { 47457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 47557b952d6SSatish Balay MPI_Comm comm = mat->comm; 47657b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 47757b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 47857b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 47957b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 48057b952d6SSatish Balay InsertMode addv; 48157b952d6SSatish Balay Scalar *rvalues,*svalues; 48257b952d6SSatish Balay 48357b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 484e0fa3b82SLois Curfman McInnes MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 48557b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 486e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 48757b952d6SSatish Balay } 488e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 48957b952d6SSatish Balay 49057b952d6SSatish Balay /* first count number of contributors to each processor */ 49157b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 49257b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 49357b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 49457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 49557b952d6SSatish Balay idx = baij->stash.idx[i]; 49657b952d6SSatish Balay for ( j=0; j<size; j++ ) { 49757b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 49857b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 49957b952d6SSatish Balay } 50057b952d6SSatish Balay } 50157b952d6SSatish Balay } 50257b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 50357b952d6SSatish Balay 50457b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 50557b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 50657b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 50757b952d6SSatish Balay nreceives = work[rank]; 50857b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 50957b952d6SSatish Balay nmax = work[rank]; 51057b952d6SSatish Balay PetscFree(work); 51157b952d6SSatish Balay 51257b952d6SSatish Balay /* post receives: 51357b952d6SSatish Balay 1) each message will consist of ordered pairs 51457b952d6SSatish Balay (global index,value) we store the global index as a double 51557b952d6SSatish Balay to simplify the message passing. 51657b952d6SSatish Balay 2) since we don't know how long each individual message is we 51757b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 51857b952d6SSatish Balay this is a lot of wasted space. 51957b952d6SSatish Balay 52057b952d6SSatish Balay 52157b952d6SSatish Balay This could be done better. 52257b952d6SSatish Balay */ 52357b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 52457b952d6SSatish Balay CHKPTRQ(rvalues); 52557b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 52657b952d6SSatish Balay CHKPTRQ(recv_waits); 52757b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 52857b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 52957b952d6SSatish Balay comm,recv_waits+i); 53057b952d6SSatish Balay } 53157b952d6SSatish Balay 53257b952d6SSatish Balay /* do sends: 53357b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 53457b952d6SSatish Balay the ith processor 53557b952d6SSatish Balay */ 53657b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 53757b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 53857b952d6SSatish Balay CHKPTRQ(send_waits); 53957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 54057b952d6SSatish Balay starts[0] = 0; 54157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 54257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 54357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 54457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 54557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 54657b952d6SSatish Balay } 54757b952d6SSatish Balay PetscFree(owner); 54857b952d6SSatish Balay starts[0] = 0; 54957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 55057b952d6SSatish Balay count = 0; 55157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 55257b952d6SSatish Balay if (procs[i]) { 55357b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 55457b952d6SSatish Balay comm,send_waits+count++); 55557b952d6SSatish Balay } 55657b952d6SSatish Balay } 55757b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 55857b952d6SSatish Balay 55957b952d6SSatish Balay /* Free cache space */ 560d2dc9b81SLois Curfman McInnes PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 56157b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 56257b952d6SSatish Balay 56357b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 56457b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 56557b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 56657b952d6SSatish Balay baij->rmax = nmax; 56757b952d6SSatish Balay 56857b952d6SSatish Balay return 0; 56957b952d6SSatish Balay } 57057b952d6SSatish Balay 57157b952d6SSatish Balay 5725615d1e5SSatish Balay #undef __FUNC__ 5735615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 574ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 57557b952d6SSatish Balay { 57657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 57757b952d6SSatish Balay MPI_Status *send_status,recv_status; 57857b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 57957b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 58057b952d6SSatish Balay Scalar *values,val; 581e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 58257b952d6SSatish Balay 58357b952d6SSatish Balay /* wait on receives */ 58457b952d6SSatish Balay while (count) { 58557b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 58657b952d6SSatish Balay /* unpack receives into our local space */ 58757b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 58857b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 58957b952d6SSatish Balay n = n/3; 59057b952d6SSatish Balay for ( i=0; i<n; i++ ) { 59157b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 59257b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 59357b952d6SSatish Balay val = values[3*i+2]; 59457b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 59557b952d6SSatish Balay col -= baij->cstart*bs; 59657b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 59757b952d6SSatish Balay } 59857b952d6SSatish Balay else { 59957b952d6SSatish Balay if (mat->was_assembled) { 600905e6a2fSBarry Smith if (!baij->colmap) { 601905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 602905e6a2fSBarry Smith } 603905e6a2fSBarry Smith col = (baij->colmap[col/bs]-1)*bs + col%bs; 60457b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 60557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 60657b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 60757b952d6SSatish Balay } 60857b952d6SSatish Balay } 60957b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 61057b952d6SSatish Balay } 61157b952d6SSatish Balay } 61257b952d6SSatish Balay count--; 61357b952d6SSatish Balay } 61457b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 61557b952d6SSatish Balay 61657b952d6SSatish Balay /* wait on sends */ 61757b952d6SSatish Balay if (baij->nsends) { 61857b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 61957b952d6SSatish Balay CHKPTRQ(send_status); 62057b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 62157b952d6SSatish Balay PetscFree(send_status); 62257b952d6SSatish Balay } 62357b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 62457b952d6SSatish Balay 62557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 62657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 62757b952d6SSatish Balay 62857b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 62957b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 63057b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 63157b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 63257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 63357b952d6SSatish Balay } 63457b952d6SSatish Balay 6356d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 63657b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 63757b952d6SSatish Balay } 63857b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 63957b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 64057b952d6SSatish Balay 64157b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 64257b952d6SSatish Balay return 0; 64357b952d6SSatish Balay } 64457b952d6SSatish Balay 6455615d1e5SSatish Balay #undef __FUNC__ 6465615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 64757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 64857b952d6SSatish Balay { 64957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 65057b952d6SSatish Balay int ierr; 65157b952d6SSatish Balay 65257b952d6SSatish Balay if (baij->size == 1) { 65357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 65457b952d6SSatish Balay } 655e3372554SBarry Smith else SETERRQ(1,0,"Only uniprocessor output supported"); 65657b952d6SSatish Balay return 0; 65757b952d6SSatish Balay } 65857b952d6SSatish Balay 6595615d1e5SSatish Balay #undef __FUNC__ 6605615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 66157b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 66257b952d6SSatish Balay { 66357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 664cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 66557b952d6SSatish Balay FILE *fd; 66657b952d6SSatish Balay ViewerType vtype; 66757b952d6SSatish Balay 66857b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 66957b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 67057b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 671639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 6724e220ebcSLois Curfman McInnes MatInfo info; 67357b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 67457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 6754e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 67657b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 67757b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 6784e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 6794e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 6804e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 6814e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 6824e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 6834e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 68457b952d6SSatish Balay fflush(fd); 68557b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 68657b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 68757b952d6SSatish Balay return 0; 68857b952d6SSatish Balay } 689639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_INFO) { 690bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 69157b952d6SSatish Balay return 0; 69257b952d6SSatish Balay } 69357b952d6SSatish Balay } 69457b952d6SSatish Balay 69557b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 69657b952d6SSatish Balay Draw draw; 69757b952d6SSatish Balay PetscTruth isnull; 69857b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 69957b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 70057b952d6SSatish Balay } 70157b952d6SSatish Balay 70257b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 70357b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 70457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 70557b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 70657b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 70757b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 70857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 70957b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 71057b952d6SSatish Balay fflush(fd); 71157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 71257b952d6SSatish Balay } 71357b952d6SSatish Balay else { 71457b952d6SSatish Balay int size = baij->size; 71557b952d6SSatish Balay rank = baij->rank; 71657b952d6SSatish Balay if (size == 1) { 71757b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 71857b952d6SSatish Balay } 71957b952d6SSatish Balay else { 72057b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 72157b952d6SSatish Balay Mat A; 72257b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 72357b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 72457b952d6SSatish Balay int mbs=baij->mbs; 72557b952d6SSatish Balay Scalar *a; 72657b952d6SSatish Balay 72757b952d6SSatish Balay if (!rank) { 728cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 72957b952d6SSatish Balay CHKERRQ(ierr); 73057b952d6SSatish Balay } 73157b952d6SSatish Balay else { 732cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 73357b952d6SSatish Balay CHKERRQ(ierr); 73457b952d6SSatish Balay } 73557b952d6SSatish Balay PLogObjectParent(mat,A); 73657b952d6SSatish Balay 73757b952d6SSatish Balay /* copy over the A part */ 73857b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 73957b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 74057b952d6SSatish Balay row = baij->rstart; 74157b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 74257b952d6SSatish Balay 74357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 74457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 74557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 74657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 74757b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 74857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 749cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 750cee3aa6bSSatish Balay col++; a += bs; 75157b952d6SSatish Balay } 75257b952d6SSatish Balay } 75357b952d6SSatish Balay } 75457b952d6SSatish Balay /* copy over the B part */ 75557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 75657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 75757b952d6SSatish Balay row = baij->rstart*bs; 75857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 75957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 76057b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 76157b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 76257b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 76357b952d6SSatish Balay for (k=0; k<bs; k++ ) { 764cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 765cee3aa6bSSatish Balay col++; a += bs; 76657b952d6SSatish Balay } 76757b952d6SSatish Balay } 76857b952d6SSatish Balay } 76957b952d6SSatish Balay PetscFree(rvals); 7706d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7716d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 77257b952d6SSatish Balay if (!rank) { 77357b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 77457b952d6SSatish Balay } 77557b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 77657b952d6SSatish Balay } 77757b952d6SSatish Balay } 77857b952d6SSatish Balay return 0; 77957b952d6SSatish Balay } 78057b952d6SSatish Balay 78157b952d6SSatish Balay 78257b952d6SSatish Balay 7835615d1e5SSatish Balay #undef __FUNC__ 7845615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 785ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 78657b952d6SSatish Balay { 78757b952d6SSatish Balay Mat mat = (Mat) obj; 78857b952d6SSatish Balay int ierr; 78957b952d6SSatish Balay ViewerType vtype; 79057b952d6SSatish Balay 79157b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 79257b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 79357b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 79457b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 79557b952d6SSatish Balay } 79657b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 79757b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 79857b952d6SSatish Balay } 79957b952d6SSatish Balay return 0; 80057b952d6SSatish Balay } 80157b952d6SSatish Balay 8025615d1e5SSatish Balay #undef __FUNC__ 8035615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 804ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 80579bdfe76SSatish Balay { 80679bdfe76SSatish Balay Mat mat = (Mat) obj; 80779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 80879bdfe76SSatish Balay int ierr; 80979bdfe76SSatish Balay 81079bdfe76SSatish Balay #if defined(PETSC_LOG) 81179bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 81279bdfe76SSatish Balay #endif 81379bdfe76SSatish Balay 81479bdfe76SSatish Balay PetscFree(baij->rowners); 81579bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 81679bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 81779bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 81879bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 81979bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 82079bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 82179bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 82230793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 82379bdfe76SSatish Balay PetscFree(baij); 82490f02eecSBarry Smith if (mat->mapping) { 82590f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 82690f02eecSBarry Smith } 82779bdfe76SSatish Balay PLogObjectDestroy(mat); 82879bdfe76SSatish Balay PetscHeaderDestroy(mat); 82979bdfe76SSatish Balay return 0; 83079bdfe76SSatish Balay } 83179bdfe76SSatish Balay 8325615d1e5SSatish Balay #undef __FUNC__ 8335615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 834ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 835cee3aa6bSSatish Balay { 836cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 83747b4a8eaSLois Curfman McInnes int ierr, nt; 838cee3aa6bSSatish Balay 839c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 84047b4a8eaSLois Curfman McInnes if (nt != a->n) { 841ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 84247b4a8eaSLois Curfman McInnes } 843c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 84447b4a8eaSLois Curfman McInnes if (nt != a->m) { 845e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 84647b4a8eaSLois Curfman McInnes } 84743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 848cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 84943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 850cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 85143a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 852cee3aa6bSSatish Balay return 0; 853cee3aa6bSSatish Balay } 854cee3aa6bSSatish Balay 8555615d1e5SSatish Balay #undef __FUNC__ 8565615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 857ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 858cee3aa6bSSatish Balay { 859cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 860cee3aa6bSSatish Balay int ierr; 86143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 862cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 86343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 864cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 865cee3aa6bSSatish Balay return 0; 866cee3aa6bSSatish Balay } 867cee3aa6bSSatish Balay 8685615d1e5SSatish Balay #undef __FUNC__ 8695615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 870ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 871cee3aa6bSSatish Balay { 872cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 873cee3aa6bSSatish Balay int ierr; 874cee3aa6bSSatish Balay 875cee3aa6bSSatish Balay /* do nondiagonal part */ 876cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 877cee3aa6bSSatish Balay /* send it on its way */ 878537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 879cee3aa6bSSatish Balay /* do local part */ 880cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 881cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 882cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 883cee3aa6bSSatish Balay /* but is not perhaps always true. */ 884639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 885cee3aa6bSSatish Balay return 0; 886cee3aa6bSSatish Balay } 887cee3aa6bSSatish Balay 8885615d1e5SSatish Balay #undef __FUNC__ 8895615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 890ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 891cee3aa6bSSatish Balay { 892cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 893cee3aa6bSSatish Balay int ierr; 894cee3aa6bSSatish Balay 895cee3aa6bSSatish Balay /* do nondiagonal part */ 896cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 897cee3aa6bSSatish Balay /* send it on its way */ 898537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 899cee3aa6bSSatish Balay /* do local part */ 900cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 901cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 902cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 903cee3aa6bSSatish Balay /* but is not perhaps always true. */ 904537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 905cee3aa6bSSatish Balay return 0; 906cee3aa6bSSatish Balay } 907cee3aa6bSSatish Balay 908cee3aa6bSSatish Balay /* 909cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 910cee3aa6bSSatish Balay diagonal block 911cee3aa6bSSatish Balay */ 9125615d1e5SSatish Balay #undef __FUNC__ 9135615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 914ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 915cee3aa6bSSatish Balay { 916cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 917cee3aa6bSSatish Balay if (a->M != a->N) 918e3372554SBarry Smith SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 919cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 920cee3aa6bSSatish Balay } 921cee3aa6bSSatish Balay 9225615d1e5SSatish Balay #undef __FUNC__ 9235615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 924ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 925cee3aa6bSSatish Balay { 926cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 927cee3aa6bSSatish Balay int ierr; 928cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 929cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 930cee3aa6bSSatish Balay return 0; 931cee3aa6bSSatish Balay } 932026e39d0SSatish Balay 9335615d1e5SSatish Balay #undef __FUNC__ 9345615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 935ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 93657b952d6SSatish Balay { 93757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 93857b952d6SSatish Balay *m = mat->M; *n = mat->N; 93957b952d6SSatish Balay return 0; 94057b952d6SSatish Balay } 94157b952d6SSatish Balay 9425615d1e5SSatish Balay #undef __FUNC__ 9435615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 944ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 94557b952d6SSatish Balay { 94657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 94757b952d6SSatish Balay *m = mat->m; *n = mat->N; 94857b952d6SSatish Balay return 0; 94957b952d6SSatish Balay } 95057b952d6SSatish Balay 9515615d1e5SSatish Balay #undef __FUNC__ 9525615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 953ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 95457b952d6SSatish Balay { 95557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 95657b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 95757b952d6SSatish Balay return 0; 95857b952d6SSatish Balay } 95957b952d6SSatish Balay 960acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 961acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 962acdf5bf4SSatish Balay 9635615d1e5SSatish Balay #undef __FUNC__ 9645615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 965acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 966acdf5bf4SSatish Balay { 967acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 968acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 969acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 970d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 971d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 972acdf5bf4SSatish Balay 973e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 974acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 975acdf5bf4SSatish Balay 976acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 977acdf5bf4SSatish Balay /* 978acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 979acdf5bf4SSatish Balay */ 980acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 981bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 982bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 983acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 984acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 985acdf5bf4SSatish Balay } 986acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 987acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 988acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 989acdf5bf4SSatish Balay } 990acdf5bf4SSatish Balay 991acdf5bf4SSatish Balay 992e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 993d9d09a02SSatish Balay lrow = row - brstart; 994acdf5bf4SSatish Balay 995acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 996acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 997acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 998acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 999acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1000acdf5bf4SSatish Balay nztot = nzA + nzB; 1001acdf5bf4SSatish Balay 1002acdf5bf4SSatish Balay cmap = mat->garray; 1003acdf5bf4SSatish Balay if (v || idx) { 1004acdf5bf4SSatish Balay if (nztot) { 1005acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1006acdf5bf4SSatish Balay int imark = -1; 1007acdf5bf4SSatish Balay if (v) { 1008acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1009acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1010d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1011acdf5bf4SSatish Balay else break; 1012acdf5bf4SSatish Balay } 1013acdf5bf4SSatish Balay imark = i; 1014acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1015acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1016acdf5bf4SSatish Balay } 1017acdf5bf4SSatish Balay if (idx) { 1018acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1019acdf5bf4SSatish Balay if (imark > -1) { 1020acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1021bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1022acdf5bf4SSatish Balay } 1023acdf5bf4SSatish Balay } else { 1024acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1025d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1026d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1027acdf5bf4SSatish Balay else break; 1028acdf5bf4SSatish Balay } 1029acdf5bf4SSatish Balay imark = i; 1030acdf5bf4SSatish Balay } 1031d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1032d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1033acdf5bf4SSatish Balay } 1034acdf5bf4SSatish Balay } 1035d212a18eSSatish Balay else { 1036d212a18eSSatish Balay if (idx) *idx = 0; 1037d212a18eSSatish Balay if (v) *v = 0; 1038d212a18eSSatish Balay } 1039acdf5bf4SSatish Balay } 1040acdf5bf4SSatish Balay *nz = nztot; 1041acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1042acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1043acdf5bf4SSatish Balay return 0; 1044acdf5bf4SSatish Balay } 1045acdf5bf4SSatish Balay 10465615d1e5SSatish Balay #undef __FUNC__ 10475615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1048acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1049acdf5bf4SSatish Balay { 1050acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1051acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1052e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 1053acdf5bf4SSatish Balay } 1054acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 1055acdf5bf4SSatish Balay return 0; 1056acdf5bf4SSatish Balay } 1057acdf5bf4SSatish Balay 10585615d1e5SSatish Balay #undef __FUNC__ 10595615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1060ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 10615a838052SSatish Balay { 10625a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 10635a838052SSatish Balay *bs = baij->bs; 10645a838052SSatish Balay return 0; 10655a838052SSatish Balay } 10665a838052SSatish Balay 10675615d1e5SSatish Balay #undef __FUNC__ 10685615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1069ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 107058667388SSatish Balay { 107158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 107258667388SSatish Balay int ierr; 107358667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 107458667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 107558667388SSatish Balay return 0; 107658667388SSatish Balay } 10770ac07820SSatish Balay 10785615d1e5SSatish Balay #undef __FUNC__ 10795615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1080ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 10810ac07820SSatish Balay { 10824e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 10834e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 10847d57db60SLois Curfman McInnes int ierr; 10857d57db60SLois Curfman McInnes double isend[5], irecv[5]; 10860ac07820SSatish Balay 10874e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 10884e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 10894e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 10904e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 10914e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 10924e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 10934e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 10944e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 10954e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 10960ac07820SSatish Balay if (flag == MAT_LOCAL) { 10974e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 10984e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 10994e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11004e220ebcSLois Curfman McInnes info->memory = isend[3]; 11014e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11020ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1103dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm); 11044e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11054e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11064e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11074e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11084e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11090ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1110dd2c0978SLois Curfman McInnes MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm); 11114e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11124e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11134e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11144e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11154e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11160ac07820SSatish Balay } 11174e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 11184e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 11194e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 11200ac07820SSatish Balay return 0; 11210ac07820SSatish Balay } 11220ac07820SSatish Balay 11235615d1e5SSatish Balay #undef __FUNC__ 11245615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1125ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 112658667388SSatish Balay { 112758667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 112858667388SSatish Balay 112958667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 113058667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 11316da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1132c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 1133c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1134b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1135b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1136b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1137aeafbbfcSLois Curfman McInnes a->roworiented = 1; 113858667388SSatish Balay MatSetOption(a->A,op); 113958667388SSatish Balay MatSetOption(a->B,op); 1140b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 11416da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 114258667388SSatish Balay op == MAT_SYMMETRIC || 114358667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 114458667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 114558667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 114658667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 114758667388SSatish Balay a->roworiented = 0; 114858667388SSatish Balay MatSetOption(a->A,op); 114958667388SSatish Balay MatSetOption(a->B,op); 115090f02eecSBarry Smith } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 115190f02eecSBarry Smith a->donotstash = 1; 115290f02eecSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) 1153e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 115458667388SSatish Balay else 1155e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 115658667388SSatish Balay return 0; 115758667388SSatish Balay } 115858667388SSatish Balay 11595615d1e5SSatish Balay #undef __FUNC__ 11605615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1161ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 11620ac07820SSatish Balay { 11630ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 11640ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 11650ac07820SSatish Balay Mat B; 11660ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 11670ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 11680ac07820SSatish Balay Scalar *a; 11690ac07820SSatish Balay 11700ac07820SSatish Balay if (matout == PETSC_NULL && M != N) 1171e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 11720ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 11730ac07820SSatish Balay CHKERRQ(ierr); 11740ac07820SSatish Balay 11750ac07820SSatish Balay /* copy over the A part */ 11760ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 11770ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 11780ac07820SSatish Balay row = baij->rstart; 11790ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 11800ac07820SSatish Balay 11810ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 11820ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 11830ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 11840ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 11850ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 11860ac07820SSatish Balay for (k=0; k<bs; k++ ) { 11870ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 11880ac07820SSatish Balay col++; a += bs; 11890ac07820SSatish Balay } 11900ac07820SSatish Balay } 11910ac07820SSatish Balay } 11920ac07820SSatish Balay /* copy over the B part */ 11930ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 11940ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 11950ac07820SSatish Balay row = baij->rstart*bs; 11960ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 11970ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 11980ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 11990ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12000ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12010ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12020ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12030ac07820SSatish Balay col++; a += bs; 12040ac07820SSatish Balay } 12050ac07820SSatish Balay } 12060ac07820SSatish Balay } 12070ac07820SSatish Balay PetscFree(rvals); 12080ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12090ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12100ac07820SSatish Balay 12110ac07820SSatish Balay if (matout != PETSC_NULL) { 12120ac07820SSatish Balay *matout = B; 12130ac07820SSatish Balay } else { 12140ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 12150ac07820SSatish Balay PetscFree(baij->rowners); 12160ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 12170ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 12180ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 12190ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 12200ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 12210ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 12220ac07820SSatish Balay PetscFree(baij); 12230ac07820SSatish Balay PetscMemcpy(A,B,sizeof(struct _Mat)); 12240ac07820SSatish Balay PetscHeaderDestroy(B); 12250ac07820SSatish Balay } 12260ac07820SSatish Balay return 0; 12270ac07820SSatish Balay } 12280e95ebc0SSatish Balay 12295615d1e5SSatish Balay #undef __FUNC__ 12305615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 12310e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 12320e95ebc0SSatish Balay { 12330e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 12340e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 12350e95ebc0SSatish Balay int ierr,s1,s2,s3; 12360e95ebc0SSatish Balay 12370e95ebc0SSatish Balay if (ll) { 12380e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 12390e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1240e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 12410e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 12420e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 12430e95ebc0SSatish Balay } 1244e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 12450e95ebc0SSatish Balay return 0; 12460e95ebc0SSatish Balay } 12470e95ebc0SSatish Balay 12480ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 12490ac07820SSatish Balay matrix is square and the column and row owerships are identical. 12500ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 12510ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 12520ac07820SSatish Balay routine. 12530ac07820SSatish Balay */ 12545615d1e5SSatish Balay #undef __FUNC__ 12555615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1256ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 12570ac07820SSatish Balay { 12580ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 12590ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 12600ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 12610ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 12620ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 12630ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 12640ac07820SSatish Balay MPI_Comm comm = A->comm; 12650ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 12660ac07820SSatish Balay MPI_Status recv_status,*send_status; 12670ac07820SSatish Balay IS istmp; 12680ac07820SSatish Balay 12690ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 12700ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 12710ac07820SSatish Balay 12720ac07820SSatish Balay /* first count number of contributors to each processor */ 12730ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 12740ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 12750ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 12760ac07820SSatish Balay for ( i=0; i<N; i++ ) { 12770ac07820SSatish Balay idx = rows[i]; 12780ac07820SSatish Balay found = 0; 12790ac07820SSatish Balay for ( j=0; j<size; j++ ) { 12800ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 12810ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 12820ac07820SSatish Balay } 12830ac07820SSatish Balay } 1284e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 12850ac07820SSatish Balay } 12860ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 12870ac07820SSatish Balay 12880ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 12890ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 12900ac07820SSatish Balay MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 12910ac07820SSatish Balay nrecvs = work[rank]; 12920ac07820SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 12930ac07820SSatish Balay nmax = work[rank]; 12940ac07820SSatish Balay PetscFree(work); 12950ac07820SSatish Balay 12960ac07820SSatish Balay /* post receives: */ 12970ac07820SSatish Balay rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 12980ac07820SSatish Balay CHKPTRQ(rvalues); 12990ac07820SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 13000ac07820SSatish Balay CHKPTRQ(recv_waits); 13010ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13020ac07820SSatish Balay MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 13030ac07820SSatish Balay } 13040ac07820SSatish Balay 13050ac07820SSatish Balay /* do sends: 13060ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13070ac07820SSatish Balay the ith processor 13080ac07820SSatish Balay */ 13090ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 13100ac07820SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 13110ac07820SSatish Balay CHKPTRQ(send_waits); 13120ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 13130ac07820SSatish Balay starts[0] = 0; 13140ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13150ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13160ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 13170ac07820SSatish Balay } 13180ac07820SSatish Balay ISRestoreIndices(is,&rows); 13190ac07820SSatish Balay 13200ac07820SSatish Balay starts[0] = 0; 13210ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13220ac07820SSatish Balay count = 0; 13230ac07820SSatish Balay for ( i=0; i<size; i++ ) { 13240ac07820SSatish Balay if (procs[i]) { 13250ac07820SSatish Balay MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 13260ac07820SSatish Balay } 13270ac07820SSatish Balay } 13280ac07820SSatish Balay PetscFree(starts); 13290ac07820SSatish Balay 13300ac07820SSatish Balay base = owners[rank]*bs; 13310ac07820SSatish Balay 13320ac07820SSatish Balay /* wait on receives */ 13330ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 13340ac07820SSatish Balay source = lens + nrecvs; 13350ac07820SSatish Balay count = nrecvs; slen = 0; 13360ac07820SSatish Balay while (count) { 13370ac07820SSatish Balay MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 13380ac07820SSatish Balay /* unpack receives into our local space */ 13390ac07820SSatish Balay MPI_Get_count(&recv_status,MPI_INT,&n); 13400ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 13410ac07820SSatish Balay lens[imdex] = n; 13420ac07820SSatish Balay slen += n; 13430ac07820SSatish Balay count--; 13440ac07820SSatish Balay } 13450ac07820SSatish Balay PetscFree(recv_waits); 13460ac07820SSatish Balay 13470ac07820SSatish Balay /* move the data into the send scatter */ 13480ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 13490ac07820SSatish Balay count = 0; 13500ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 13510ac07820SSatish Balay values = rvalues + i*nmax; 13520ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 13530ac07820SSatish Balay lrows[count++] = values[j] - base; 13540ac07820SSatish Balay } 13550ac07820SSatish Balay } 13560ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 13570ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 13580ac07820SSatish Balay 13590ac07820SSatish Balay /* actually zap the local rows */ 1360537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 13610ac07820SSatish Balay PLogObjectParent(A,istmp); 13620ac07820SSatish Balay PetscFree(lrows); 13630ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 13640ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 13650ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 13660ac07820SSatish Balay 13670ac07820SSatish Balay /* wait on sends */ 13680ac07820SSatish Balay if (nsends) { 13690ac07820SSatish Balay send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 13700ac07820SSatish Balay CHKPTRQ(send_status); 13710ac07820SSatish Balay MPI_Waitall(nsends,send_waits,send_status); 13720ac07820SSatish Balay PetscFree(send_status); 13730ac07820SSatish Balay } 13740ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 13750ac07820SSatish Balay 13760ac07820SSatish Balay return 0; 13770ac07820SSatish Balay } 1378ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 13795615d1e5SSatish Balay #undef __FUNC__ 13805615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1381ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1382ba4ca20aSSatish Balay { 1383ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1384ba4ca20aSSatish Balay 1385ba4ca20aSSatish Balay if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A); 1386ba4ca20aSSatish Balay else return 0; 1387ba4ca20aSSatish Balay } 13880ac07820SSatish Balay 13895615d1e5SSatish Balay #undef __FUNC__ 13905615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1391ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1392bb5a7306SBarry Smith { 1393bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1394bb5a7306SBarry Smith int ierr; 1395bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1396bb5a7306SBarry Smith return 0; 1397bb5a7306SBarry Smith } 1398bb5a7306SBarry Smith 13990ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14000ac07820SSatish Balay 140179bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 140279bdfe76SSatish Balay static struct _MatOps MatOps = { 1403bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14044c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14054c50302cSBarry Smith 0,0,0,0, 14060ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14070e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 140858667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14094c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14104c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14114c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 141294a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1413d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1414ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1415bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1416ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 141779bdfe76SSatish Balay 141879bdfe76SSatish Balay 14195615d1e5SSatish Balay #undef __FUNC__ 14205615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 142179bdfe76SSatish Balay /*@C 142279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 142379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 142479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 142579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 142679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 142779bdfe76SSatish Balay 142879bdfe76SSatish Balay Input Parameters: 142979bdfe76SSatish Balay . comm - MPI communicator 143079bdfe76SSatish Balay . bs - size of blockk 143179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 143292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 143392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 143492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 143592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 143692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 143779bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 143892e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 143979bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 144079bdfe76SSatish Balay submatrix (same for all local rows) 144192e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 144292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 144392e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 144492e8d321SLois Curfman McInnes it is zero. 144592e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 144679bdfe76SSatish Balay submatrix (same for all local rows). 144792e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 144892e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 144992e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 145079bdfe76SSatish Balay 145179bdfe76SSatish Balay Output Parameter: 145279bdfe76SSatish Balay . A - the matrix 145379bdfe76SSatish Balay 145479bdfe76SSatish Balay Notes: 145579bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 145679bdfe76SSatish Balay (possibly both). 145779bdfe76SSatish Balay 145879bdfe76SSatish Balay Storage Information: 145979bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 146079bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 146179bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 146279bdfe76SSatish Balay local matrix (a rectangular submatrix). 146379bdfe76SSatish Balay 146479bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 146579bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 146679bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 146779bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 146879bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 146979bdfe76SSatish Balay 147079bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 147179bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 147279bdfe76SSatish Balay 147379bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 147479bdfe76SSatish Balay $ ------------------- 147579bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 147679bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 147779bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 147879bdfe76SSatish Balay $ ------------------- 147979bdfe76SSatish Balay $ 148079bdfe76SSatish Balay 148179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 148279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 148379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 148457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 148579bdfe76SSatish Balay 148679bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 148779bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 148879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 148992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 149092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14916da5968aSLois Curfman McInnes matrices. 149279bdfe76SSatish Balay 149392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 149479bdfe76SSatish Balay 149579bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 149679bdfe76SSatish Balay @*/ 149779bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 149879bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 149979bdfe76SSatish Balay { 150079bdfe76SSatish Balay Mat B; 150179bdfe76SSatish Balay Mat_MPIBAIJ *b; 15024c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 150379bdfe76SSatish Balay 1504e3372554SBarry Smith if (bs < 1) SETERRQ(1,0,"invalid block size specified"); 150579bdfe76SSatish Balay *A = 0; 150679bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 150779bdfe76SSatish Balay PLogObjectCreate(B); 150879bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 150979bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 151079bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15114c50302cSBarry Smith MPI_Comm_size(comm,&size); 15124c50302cSBarry Smith if (size == 1) { 15134c50302cSBarry Smith B->ops.getrowij = MatGetRowIJ_MPIBAIJ; 15144c50302cSBarry Smith B->ops.restorerowij = MatRestoreRowIJ_MPIBAIJ; 15154c50302cSBarry Smith B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIBAIJ; 15164c50302cSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_MPIBAIJ; 15174c50302cSBarry Smith B->ops.lufactor = MatLUFactor_MPIBAIJ; 15184c50302cSBarry Smith B->ops.solve = MatSolve_MPIBAIJ; 15194c50302cSBarry Smith B->ops.solveadd = MatSolveAdd_MPIBAIJ; 15204c50302cSBarry Smith B->ops.solvetrans = MatSolveTrans_MPIBAIJ; 15214c50302cSBarry Smith B->ops.solvetransadd = MatSolveTransAdd_MPIBAIJ; 15224c50302cSBarry Smith B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ; 15234c50302cSBarry Smith } 15244c50302cSBarry Smith 152579bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 152679bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 152790f02eecSBarry Smith B->mapping = 0; 152879bdfe76SSatish Balay B->factor = 0; 152979bdfe76SSatish Balay B->assembled = PETSC_FALSE; 153079bdfe76SSatish Balay 1531e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 153279bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 153379bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 153479bdfe76SSatish Balay 153579bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1536e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1537e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1538e3372554SBarry Smith if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified"); 1539cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1540cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 154179bdfe76SSatish Balay 154279bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 154379bdfe76SSatish Balay work[0] = m; work[1] = n; 154479bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 154579bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 154679bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 154779bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 154879bdfe76SSatish Balay } 154979bdfe76SSatish Balay if (m == PETSC_DECIDE) { 155079bdfe76SSatish Balay Mbs = M/bs; 1551e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 155279bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 155379bdfe76SSatish Balay m = mbs*bs; 155479bdfe76SSatish Balay } 155579bdfe76SSatish Balay if (n == PETSC_DECIDE) { 155679bdfe76SSatish Balay Nbs = N/bs; 1557e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 155879bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 155979bdfe76SSatish Balay n = nbs*bs; 156079bdfe76SSatish Balay } 1561e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 156279bdfe76SSatish Balay 156379bdfe76SSatish Balay b->m = m; B->m = m; 156479bdfe76SSatish Balay b->n = n; B->n = n; 156579bdfe76SSatish Balay b->N = N; B->N = N; 156679bdfe76SSatish Balay b->M = M; B->M = M; 156779bdfe76SSatish Balay b->bs = bs; 156879bdfe76SSatish Balay b->bs2 = bs*bs; 156979bdfe76SSatish Balay b->mbs = mbs; 157079bdfe76SSatish Balay b->nbs = nbs; 157179bdfe76SSatish Balay b->Mbs = Mbs; 157279bdfe76SSatish Balay b->Nbs = Nbs; 157379bdfe76SSatish Balay 157479bdfe76SSatish Balay /* build local table of row and column ownerships */ 157579bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 157679bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 15770ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 157879bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 157979bdfe76SSatish Balay b->rowners[0] = 0; 158079bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 158179bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 158279bdfe76SSatish Balay } 158379bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 158479bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 15854fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 15864fa0d573SSatish Balay b->rend_bs = b->rend * bs; 15874fa0d573SSatish Balay 158879bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 158979bdfe76SSatish Balay b->cowners[0] = 0; 159079bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 159179bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 159279bdfe76SSatish Balay } 159379bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 159479bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 15954fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 15964fa0d573SSatish Balay b->cend_bs = b->cend * bs; 159779bdfe76SSatish Balay 159879bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 159979bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 160079bdfe76SSatish Balay PLogObjectParent(B,b->A); 160179bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 160279bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 160379bdfe76SSatish Balay PLogObjectParent(B,b->B); 160479bdfe76SSatish Balay 160579bdfe76SSatish Balay /* build cache for off array entries formed */ 160679bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 160790f02eecSBarry Smith b->donotstash = 0; 160879bdfe76SSatish Balay b->colmap = 0; 160979bdfe76SSatish Balay b->garray = 0; 161079bdfe76SSatish Balay b->roworiented = 1; 161179bdfe76SSatish Balay 161230793edcSSatish Balay /* stuff used in block assembly */ 161330793edcSSatish Balay b->barray = 0; 161430793edcSSatish Balay 161579bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 161679bdfe76SSatish Balay b->lvec = 0; 161779bdfe76SSatish Balay b->Mvctx = 0; 161879bdfe76SSatish Balay 161979bdfe76SSatish Balay /* stuff for MatGetRow() */ 162079bdfe76SSatish Balay b->rowindices = 0; 162179bdfe76SSatish Balay b->rowvalues = 0; 162279bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 162379bdfe76SSatish Balay 162479bdfe76SSatish Balay *A = B; 162579bdfe76SSatish Balay return 0; 162679bdfe76SSatish Balay } 1627026e39d0SSatish Balay 16285615d1e5SSatish Balay #undef __FUNC__ 16295615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 16300ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 16310ac07820SSatish Balay { 16320ac07820SSatish Balay Mat mat; 16330ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 16340ac07820SSatish Balay int ierr, len=0, flg; 16350ac07820SSatish Balay 16360ac07820SSatish Balay *newmat = 0; 16370ac07820SSatish Balay PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm); 16380ac07820SSatish Balay PLogObjectCreate(mat); 16390ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 16400ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 16410ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 16420ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 16430ac07820SSatish Balay mat->factor = matin->factor; 16440ac07820SSatish Balay mat->assembled = PETSC_TRUE; 16450ac07820SSatish Balay 16460ac07820SSatish Balay a->m = mat->m = oldmat->m; 16470ac07820SSatish Balay a->n = mat->n = oldmat->n; 16480ac07820SSatish Balay a->M = mat->M = oldmat->M; 16490ac07820SSatish Balay a->N = mat->N = oldmat->N; 16500ac07820SSatish Balay 16510ac07820SSatish Balay a->bs = oldmat->bs; 16520ac07820SSatish Balay a->bs2 = oldmat->bs2; 16530ac07820SSatish Balay a->mbs = oldmat->mbs; 16540ac07820SSatish Balay a->nbs = oldmat->nbs; 16550ac07820SSatish Balay a->Mbs = oldmat->Mbs; 16560ac07820SSatish Balay a->Nbs = oldmat->Nbs; 16570ac07820SSatish Balay 16580ac07820SSatish Balay a->rstart = oldmat->rstart; 16590ac07820SSatish Balay a->rend = oldmat->rend; 16600ac07820SSatish Balay a->cstart = oldmat->cstart; 16610ac07820SSatish Balay a->cend = oldmat->cend; 16620ac07820SSatish Balay a->size = oldmat->size; 16630ac07820SSatish Balay a->rank = oldmat->rank; 1664e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 16650ac07820SSatish Balay a->rowvalues = 0; 16660ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 166730793edcSSatish Balay a->barray = 0; 16680ac07820SSatish Balay 16690ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 16700ac07820SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 16710ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 16720ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 16730ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 16740ac07820SSatish Balay if (oldmat->colmap) { 16750ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 16760ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 16770ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 16780ac07820SSatish Balay } else a->colmap = 0; 16790ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 16800ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 16810ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 16820ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 16830ac07820SSatish Balay } else a->garray = 0; 16840ac07820SSatish Balay 16850ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 16860ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 16870ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 16880ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 16890ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 16900ac07820SSatish Balay PLogObjectParent(mat,a->A); 16910ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 16920ac07820SSatish Balay PLogObjectParent(mat,a->B); 16930ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 16940ac07820SSatish Balay if (flg) { 16950ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 16960ac07820SSatish Balay } 16970ac07820SSatish Balay *newmat = mat; 16980ac07820SSatish Balay return 0; 16990ac07820SSatish Balay } 170057b952d6SSatish Balay 170157b952d6SSatish Balay #include "sys.h" 170257b952d6SSatish Balay 17035615d1e5SSatish Balay #undef __FUNC__ 17045615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 170557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 170657b952d6SSatish Balay { 170757b952d6SSatish Balay Mat A; 170857b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 170957b952d6SSatish Balay Scalar *vals,*buf; 171057b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 171157b952d6SSatish Balay MPI_Status status; 1712cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 171357b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 171457b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 171557b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 171657b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 171757b952d6SSatish Balay 171857b952d6SSatish Balay 171957b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 172057b952d6SSatish Balay bs2 = bs*bs; 172157b952d6SSatish Balay 172257b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 172357b952d6SSatish Balay if (!rank) { 172457b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 172557b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1726e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 172757b952d6SSatish Balay } 172857b952d6SSatish Balay 172957b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 173057b952d6SSatish Balay M = header[1]; N = header[2]; 173157b952d6SSatish Balay 1732e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 173357b952d6SSatish Balay 173457b952d6SSatish Balay /* 173557b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 173657b952d6SSatish Balay divisible by the blocksize 173757b952d6SSatish Balay */ 173857b952d6SSatish Balay Mbs = M/bs; 173957b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 174057b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 174157b952d6SSatish Balay else Mbs++; 174257b952d6SSatish Balay if (extra_rows &&!rank) { 1743b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 174457b952d6SSatish Balay } 1745537820f0SBarry Smith 174657b952d6SSatish Balay /* determine ownership of all rows */ 174757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 174857b952d6SSatish Balay m = mbs * bs; 1749cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1750cee3aa6bSSatish Balay browners = rowners + size + 1; 175157b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 175257b952d6SSatish Balay rowners[0] = 0; 1753cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1754cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 175557b952d6SSatish Balay rstart = rowners[rank]; 175657b952d6SSatish Balay rend = rowners[rank+1]; 175757b952d6SSatish Balay 175857b952d6SSatish Balay /* distribute row lengths to all processors */ 175957b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 176057b952d6SSatish Balay if (!rank) { 176157b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 176257b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 176357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 176457b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1765cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1766cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 176757b952d6SSatish Balay PetscFree(sndcounts); 176857b952d6SSatish Balay } 176957b952d6SSatish Balay else { 177057b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 177157b952d6SSatish Balay } 177257b952d6SSatish Balay 177357b952d6SSatish Balay if (!rank) { 177457b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 177557b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 177657b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 177757b952d6SSatish Balay for ( i=0; i<size; i++ ) { 177857b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 177957b952d6SSatish Balay procsnz[i] += rowlengths[j]; 178057b952d6SSatish Balay } 178157b952d6SSatish Balay } 178257b952d6SSatish Balay PetscFree(rowlengths); 178357b952d6SSatish Balay 178457b952d6SSatish Balay /* determine max buffer needed and allocate it */ 178557b952d6SSatish Balay maxnz = 0; 178657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 178757b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 178857b952d6SSatish Balay } 178957b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 179057b952d6SSatish Balay 179157b952d6SSatish Balay /* read in my part of the matrix column indices */ 179257b952d6SSatish Balay nz = procsnz[0]; 179357b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 179457b952d6SSatish Balay mycols = ibuf; 1795cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 179657b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1797cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1798cee3aa6bSSatish Balay 179957b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 180057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 180157b952d6SSatish Balay nz = procsnz[i]; 180257b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 180357b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 180457b952d6SSatish Balay } 180557b952d6SSatish Balay /* read in the stuff for the last proc */ 180657b952d6SSatish Balay if ( size != 1 ) { 180757b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 180857b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 180957b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1810cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 181157b952d6SSatish Balay } 181257b952d6SSatish Balay PetscFree(cols); 181357b952d6SSatish Balay } 181457b952d6SSatish Balay else { 181557b952d6SSatish Balay /* determine buffer space needed for message */ 181657b952d6SSatish Balay nz = 0; 181757b952d6SSatish Balay for ( i=0; i<m; i++ ) { 181857b952d6SSatish Balay nz += locrowlens[i]; 181957b952d6SSatish Balay } 182057b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 182157b952d6SSatish Balay mycols = ibuf; 182257b952d6SSatish Balay /* receive message of column indices*/ 182357b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 182457b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1825e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 182657b952d6SSatish Balay } 182757b952d6SSatish Balay 182857b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1829cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1830cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 183157b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1832cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 183357b952d6SSatish Balay masked1 = mask + Mbs; 183457b952d6SSatish Balay masked2 = masked1 + Mbs; 183557b952d6SSatish Balay rowcount = 0; nzcount = 0; 183657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 183757b952d6SSatish Balay dcount = 0; 183857b952d6SSatish Balay odcount = 0; 183957b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 184057b952d6SSatish Balay kmax = locrowlens[rowcount]; 184157b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 184257b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 184357b952d6SSatish Balay if (!mask[tmp]) { 184457b952d6SSatish Balay mask[tmp] = 1; 184557b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 184657b952d6SSatish Balay else masked1[dcount++] = tmp; 184757b952d6SSatish Balay } 184857b952d6SSatish Balay } 184957b952d6SSatish Balay rowcount++; 185057b952d6SSatish Balay } 1851cee3aa6bSSatish Balay 185257b952d6SSatish Balay dlens[i] = dcount; 185357b952d6SSatish Balay odlens[i] = odcount; 1854cee3aa6bSSatish Balay 185557b952d6SSatish Balay /* zero out the mask elements we set */ 185657b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 185757b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 185857b952d6SSatish Balay } 1859cee3aa6bSSatish Balay 186057b952d6SSatish Balay /* create our matrix */ 1861537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1862537820f0SBarry Smith CHKERRQ(ierr); 186357b952d6SSatish Balay A = *newmat; 18646d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 186557b952d6SSatish Balay 186657b952d6SSatish Balay if (!rank) { 186757b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 186857b952d6SSatish Balay /* read in my part of the matrix numerical values */ 186957b952d6SSatish Balay nz = procsnz[0]; 187057b952d6SSatish Balay vals = buf; 1871cee3aa6bSSatish Balay mycols = ibuf; 1872cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 187357b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1874cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1875537820f0SBarry Smith 187657b952d6SSatish Balay /* insert into matrix */ 187757b952d6SSatish Balay jj = rstart*bs; 187857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 187957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 188057b952d6SSatish Balay mycols += locrowlens[i]; 188157b952d6SSatish Balay vals += locrowlens[i]; 188257b952d6SSatish Balay jj++; 188357b952d6SSatish Balay } 188457b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 188557b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 188657b952d6SSatish Balay nz = procsnz[i]; 188757b952d6SSatish Balay vals = buf; 188857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 188957b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 189057b952d6SSatish Balay } 189157b952d6SSatish Balay /* the last proc */ 189257b952d6SSatish Balay if ( size != 1 ){ 189357b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1894cee3aa6bSSatish Balay vals = buf; 189557b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 189657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1897cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 189857b952d6SSatish Balay } 189957b952d6SSatish Balay PetscFree(procsnz); 190057b952d6SSatish Balay } 190157b952d6SSatish Balay else { 190257b952d6SSatish Balay /* receive numeric values */ 190357b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 190457b952d6SSatish Balay 190557b952d6SSatish Balay /* receive message of values*/ 190657b952d6SSatish Balay vals = buf; 1907cee3aa6bSSatish Balay mycols = ibuf; 190857b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 190957b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1910e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 191157b952d6SSatish Balay 191257b952d6SSatish Balay /* insert into matrix */ 191357b952d6SSatish Balay jj = rstart*bs; 1914cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 191557b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 191657b952d6SSatish Balay mycols += locrowlens[i]; 191757b952d6SSatish Balay vals += locrowlens[i]; 191857b952d6SSatish Balay jj++; 191957b952d6SSatish Balay } 192057b952d6SSatish Balay } 192157b952d6SSatish Balay PetscFree(locrowlens); 192257b952d6SSatish Balay PetscFree(buf); 192357b952d6SSatish Balay PetscFree(ibuf); 192457b952d6SSatish Balay PetscFree(rowners); 192557b952d6SSatish Balay PetscFree(dlens); 1926cee3aa6bSSatish Balay PetscFree(mask); 19276d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 19286d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 192957b952d6SSatish Balay return 0; 193057b952d6SSatish Balay } 193157b952d6SSatish Balay 193257b952d6SSatish Balay 1933