1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*cd1fa5fbSBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.88 1997/11/05 22:32:26 bsmith Exp bsmith $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 143b2fbd54SBarry Smith 15537820f0SBarry Smith /* 16537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1957b952d6SSatish Balay length of colmap equals the global matrix length. 2057b952d6SSatish Balay */ 215615d1e5SSatish Balay #undef __FUNC__ 225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2457b952d6SSatish Balay { 2557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 2857b952d6SSatish Balay 29d64ed03dSBarry Smith PetscFunctionBegin; 30758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3257b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 343a40ed3dSBarry Smith PetscFunctionReturn(0); 3557b952d6SSatish Balay } 3657b952d6SSatish Balay 3780c1aa95SSatish Balay #define CHUNKSIZE 10 3880c1aa95SSatish Balay 39f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4080c1aa95SSatish Balay { \ 4180c1aa95SSatish Balay \ 4280c1aa95SSatish Balay brow = row/bs; \ 4380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 4580c1aa95SSatish Balay bcol = col/bs; \ 4680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 47ab26458aSBarry Smith low = 0; high = nrow; \ 48ab26458aSBarry Smith while (high-low > 3) { \ 49ab26458aSBarry Smith t = (low+high)/2; \ 50ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 51ab26458aSBarry Smith else low = t; \ 52ab26458aSBarry Smith } \ 53ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 5480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 5580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 5680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 57eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 58eada6651SSatish Balay else *bap = value; \ 59ac7a638eSSatish Balay goto a_noinsert; \ 6080c1aa95SSatish Balay } \ 6180c1aa95SSatish Balay } \ 6289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 6311ebbc71SLois Curfman McInnes else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 6480c1aa95SSatish Balay if (nrow >= rmax) { \ 6580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 6680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 6780c1aa95SSatish Balay Scalar *new_a; \ 6880c1aa95SSatish Balay \ 6911ebbc71SLois Curfman McInnes if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 7089280ab3SLois Curfman McInnes \ 7180c1aa95SSatish Balay /* malloc new storage space */ \ 7280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 7380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 7480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 7580c1aa95SSatish Balay new_i = new_j + new_nz; \ 7680c1aa95SSatish Balay \ 7780c1aa95SSatish Balay /* copy over old data into new slots */ \ 7880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 7980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 8180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 8280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 8380c1aa95SSatish Balay len*sizeof(int)); \ 8480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 8580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 8680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 8780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 8880c1aa95SSatish Balay /* free up old matrix storage */ \ 8980c1aa95SSatish Balay PetscFree(a->a); \ 9080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 9180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 9280c1aa95SSatish Balay a->singlemalloc = 1; \ 9380c1aa95SSatish Balay \ 9480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 9680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 9780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 9880c1aa95SSatish Balay a->reallocs++; \ 9980c1aa95SSatish Balay a->nz++; \ 10080c1aa95SSatish Balay } \ 10180c1aa95SSatish Balay N = nrow++ - 1; \ 10280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 10380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 10480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 10580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 10680c1aa95SSatish Balay } \ 10780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 10880c1aa95SSatish Balay rp[_i] = bcol; \ 10980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 110ac7a638eSSatish Balay a_noinsert:; \ 11180c1aa95SSatish Balay ailen[brow] = nrow; \ 11280c1aa95SSatish Balay } 11357b952d6SSatish Balay 114ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115ac7a638eSSatish Balay { \ 116ac7a638eSSatish Balay \ 117ac7a638eSSatish Balay brow = row/bs; \ 118ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 120ac7a638eSSatish Balay bcol = col/bs; \ 121ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 122ac7a638eSSatish Balay low = 0; high = nrow; \ 123ac7a638eSSatish Balay while (high-low > 3) { \ 124ac7a638eSSatish Balay t = (low+high)/2; \ 125ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 126ac7a638eSSatish Balay else low = t; \ 127ac7a638eSSatish Balay } \ 128ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 129ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 130ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 131ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 132ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 133ac7a638eSSatish Balay else *bap = value; \ 134ac7a638eSSatish Balay goto b_noinsert; \ 135ac7a638eSSatish Balay } \ 136ac7a638eSSatish Balay } \ 13789280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 13811ebbc71SLois Curfman McInnes else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 139ac7a638eSSatish Balay if (nrow >= rmax) { \ 140ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14174c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142ac7a638eSSatish Balay Scalar *new_a; \ 143ac7a638eSSatish Balay \ 14411ebbc71SLois Curfman McInnes if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 14589280ab3SLois Curfman McInnes \ 146ac7a638eSSatish Balay /* malloc new storage space */ \ 14774c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 150ac7a638eSSatish Balay new_i = new_j + new_nz; \ 151ac7a638eSSatish Balay \ 152ac7a638eSSatish Balay /* copy over old data into new slots */ \ 153ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 15474c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158ac7a638eSSatish Balay len*sizeof(int)); \ 159ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163ac7a638eSSatish Balay /* free up old matrix storage */ \ 16474c639caSSatish Balay PetscFree(b->a); \ 16574c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16774c639caSSatish Balay b->singlemalloc = 1; \ 168ac7a638eSSatish Balay \ 169ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 17174c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 17274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 17374c639caSSatish Balay b->reallocs++; \ 17474c639caSSatish Balay b->nz++; \ 175ac7a638eSSatish Balay } \ 176ac7a638eSSatish Balay N = nrow++ - 1; \ 177ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 178ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 179ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 180ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183ac7a638eSSatish Balay rp[_i] = bcol; \ 184ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 185ac7a638eSSatish Balay b_noinsert:; \ 186ac7a638eSSatish Balay bilen[brow] = nrow; \ 187ac7a638eSSatish Balay } 188ac7a638eSSatish Balay 1895615d1e5SSatish Balay #undef __FUNC__ 1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 19257b952d6SSatish Balay { 19357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 19457b952d6SSatish Balay Scalar value; 1954fa0d573SSatish Balay int ierr,i,j,row,col; 1964fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1974fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1984fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 19957b952d6SSatish Balay 200eada6651SSatish Balay /* Some Variables required in the macro */ 20180c1aa95SSatish Balay Mat A = baij->A; 20280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204ac7a638eSSatish Balay Scalar *aa=a->a; 205ac7a638eSSatish Balay 206ac7a638eSSatish Balay Mat B = baij->B; 207ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209ac7a638eSSatish Balay Scalar *ba=b->a; 210ac7a638eSSatish Balay 211ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 213ac7a638eSSatish Balay Scalar *ap,*bap; 21480c1aa95SSatish Balay 215d64ed03dSBarry Smith PetscFunctionBegin; 21657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 218e3372554SBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 219e3372554SBarry Smith if (im[i] >= baij->M) SETERRQ(1,0,"Row too large"); 220639f9d9dSBarry Smith #endif 22157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 22257b952d6SSatish Balay row = im[i] - rstart_orig; 22357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 22457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 22557b952d6SSatish Balay col = in[j] - cstart_orig; 22657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22957b952d6SSatish Balay } 2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 231e3372554SBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 232e3372554SBarry Smith else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");} 233639f9d9dSBarry Smith #endif 23457b952d6SSatish Balay else { 23557b952d6SSatish Balay if (mat->was_assembled) { 236905e6a2fSBarry Smith if (!baij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 24057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 24157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 24257b952d6SSatish Balay col = in[j]; 2439bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2449bf004c3SSatish Balay B = baij->B; 2459bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2469bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2479bf004c3SSatish Balay ba=b->a; 24857b952d6SSatish Balay } 249d64ed03dSBarry Smith } else col = in[j]; 25057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 255d64ed03dSBarry Smith } else { 25690f02eecSBarry Smith if (roworiented && !baij->donotstash) { 25757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258d64ed03dSBarry Smith } else { 25990f02eecSBarry Smith if (!baij->donotstash) { 26057b952d6SSatish Balay row = im[i]; 26157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 26257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 26357b952d6SSatish Balay } 26457b952d6SSatish Balay } 26557b952d6SSatish Balay } 26657b952d6SSatish Balay } 26790f02eecSBarry Smith } 2683a40ed3dSBarry Smith PetscFunctionReturn(0); 26957b952d6SSatish Balay } 27057b952d6SSatish Balay 271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272ab26458aSBarry Smith #undef __FUNC__ 273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275ab26458aSBarry Smith { 276ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27730793edcSSatish Balay Scalar *value,*barray=baij->barray; 278abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 279ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 280ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 281ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282ab26458aSBarry Smith 28330793edcSSatish Balay if(!barray) { 28447513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 28530793edcSSatish Balay } 28630793edcSSatish Balay 287ab26458aSBarry Smith if (roworiented) { 288ab26458aSBarry Smith stepval = (n-1)*bs; 289ab26458aSBarry Smith } else { 290ab26458aSBarry Smith stepval = (m-1)*bs; 291ab26458aSBarry Smith } 292ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 294ab26458aSBarry Smith if (im[i] < 0) SETERRQ(1,0,"Negative row"); 295ab26458aSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large"); 296ab26458aSBarry Smith #endif 297ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 298ab26458aSBarry Smith row = im[i] - rstart; 299ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 30015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 30115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 30215b57d14SSatish Balay barray = v + i*bs2; 30315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 30415b57d14SSatish Balay barray = v + j*bs2; 30515b57d14SSatish Balay } else { /* Here a copy is required */ 306ab26458aSBarry Smith if (roworiented) { 307ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 308ab26458aSBarry Smith } else { 309ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 310abef11f7SSatish Balay } 31147513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 31247513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 31330793edcSSatish Balay *barray++ = *value++; 31447513183SBarry Smith } 31547513183SBarry Smith } 31630793edcSSatish Balay barray -=bs2; 31715b57d14SSatish Balay } 318abef11f7SSatish Balay 319abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 320abef11f7SSatish Balay col = in[j] - cstart; 32130793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322ab26458aSBarry Smith } 3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 324ab26458aSBarry Smith else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 32547513183SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");} 326ab26458aSBarry Smith #endif 327ab26458aSBarry Smith else { 328ab26458aSBarry Smith if (mat->was_assembled) { 329ab26458aSBarry Smith if (!baij->colmap) { 330ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331ab26458aSBarry Smith } 332a5eb4965SSatish Balay 3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 334a5eb4965SSatish Balay if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");} 335a5eb4965SSatish Balay #endif 336a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 337ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339ab26458aSBarry Smith col = in[j]; 340ab26458aSBarry Smith } 341ab26458aSBarry Smith } 342ab26458aSBarry Smith else col = in[j]; 34330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344ab26458aSBarry Smith } 345ab26458aSBarry Smith } 346d64ed03dSBarry Smith } else { 347ab26458aSBarry Smith if (!baij->donotstash) { 348ab26458aSBarry Smith if (roworiented ) { 349abef11f7SSatish Balay row = im[i]*bs; 350abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 351abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 352abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 353abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355abef11f7SSatish Balay } 356ab26458aSBarry Smith } 357ab26458aSBarry Smith } 358d64ed03dSBarry Smith } else { 359ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 360abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 361abef11f7SSatish Balay col = in[j]*bs; 362abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 363abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365ab26458aSBarry Smith } 366ab26458aSBarry Smith } 367ab26458aSBarry Smith } 368abef11f7SSatish Balay } 369abef11f7SSatish Balay } 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 3723a40ed3dSBarry Smith PetscFunctionReturn(0); 373ab26458aSBarry Smith } 374ab26458aSBarry Smith 3755615d1e5SSatish Balay #undef __FUNC__ 3765615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 377ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 378d6de1c52SSatish Balay { 379d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 380d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 381d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 382d6de1c52SSatish Balay 383d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 384e3372554SBarry Smith if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 385e3372554SBarry Smith if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large"); 386d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 387d6de1c52SSatish Balay row = idxm[i] - bsrstart; 388d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 389e3372554SBarry Smith if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 390e3372554SBarry Smith if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large"); 391d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 392d6de1c52SSatish Balay col = idxn[j] - bscstart; 393d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 394d64ed03dSBarry Smith } else { 395905e6a2fSBarry Smith if (!baij->colmap) { 396905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 397905e6a2fSBarry Smith } 398e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 399dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 400d9d09a02SSatish Balay else { 401dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 402d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 403d6de1c52SSatish Balay } 404d6de1c52SSatish Balay } 405d6de1c52SSatish Balay } 406d64ed03dSBarry Smith } else { 407e3372554SBarry Smith SETERRQ(1,0,"Only local values currently supported"); 408d6de1c52SSatish Balay } 409d6de1c52SSatish Balay } 4103a40ed3dSBarry Smith PetscFunctionReturn(0); 411d6de1c52SSatish Balay } 412d6de1c52SSatish Balay 4135615d1e5SSatish Balay #undef __FUNC__ 4145615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 415ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 416d6de1c52SSatish Balay { 417d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 418d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 419acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 420d6de1c52SSatish Balay double sum = 0.0; 421d6de1c52SSatish Balay Scalar *v; 422d6de1c52SSatish Balay 423d64ed03dSBarry Smith PetscFunctionBegin; 424d6de1c52SSatish Balay if (baij->size == 1) { 425d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 426d6de1c52SSatish Balay } else { 427d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 428d6de1c52SSatish Balay v = amat->a; 429d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 4303a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 431d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 432d6de1c52SSatish Balay #else 433d6de1c52SSatish Balay sum += (*v)*(*v); v++; 434d6de1c52SSatish Balay #endif 435d6de1c52SSatish Balay } 436d6de1c52SSatish Balay v = bmat->a; 437d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 4383a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 439d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 440d6de1c52SSatish Balay #else 441d6de1c52SSatish Balay sum += (*v)*(*v); v++; 442d6de1c52SSatish Balay #endif 443d6de1c52SSatish Balay } 444ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 445d6de1c52SSatish Balay *norm = sqrt(*norm); 446d64ed03dSBarry Smith } else { 447e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 448d6de1c52SSatish Balay } 449d64ed03dSBarry Smith } 4503a40ed3dSBarry Smith PetscFunctionReturn(0); 451d6de1c52SSatish Balay } 45257b952d6SSatish Balay 4535615d1e5SSatish Balay #undef __FUNC__ 4545615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 455ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 45657b952d6SSatish Balay { 45757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 45857b952d6SSatish Balay MPI_Comm comm = mat->comm; 45957b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 46057b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 46157b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 46257b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 46357b952d6SSatish Balay InsertMode addv; 46457b952d6SSatish Balay Scalar *rvalues,*svalues; 46557b952d6SSatish Balay 466d64ed03dSBarry Smith PetscFunctionBegin; 46757b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 468ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 46957b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 470e3372554SBarry Smith SETERRQ(1,0,"Some processors inserted others added"); 47157b952d6SSatish Balay } 472e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 47357b952d6SSatish Balay 47457b952d6SSatish Balay /* first count number of contributors to each processor */ 47557b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 47657b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 47757b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 47857b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 47957b952d6SSatish Balay idx = baij->stash.idx[i]; 48057b952d6SSatish Balay for ( j=0; j<size; j++ ) { 48157b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 48257b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 48357b952d6SSatish Balay } 48457b952d6SSatish Balay } 48557b952d6SSatish Balay } 48657b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 48757b952d6SSatish Balay 48857b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 48957b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 490ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 49157b952d6SSatish Balay nreceives = work[rank]; 492ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 49357b952d6SSatish Balay nmax = work[rank]; 49457b952d6SSatish Balay PetscFree(work); 49557b952d6SSatish Balay 49657b952d6SSatish Balay /* post receives: 49757b952d6SSatish Balay 1) each message will consist of ordered pairs 49857b952d6SSatish Balay (global index,value) we store the global index as a double 49957b952d6SSatish Balay to simplify the message passing. 50057b952d6SSatish Balay 2) since we don't know how long each individual message is we 50157b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 50257b952d6SSatish Balay this is a lot of wasted space. 50357b952d6SSatish Balay 50457b952d6SSatish Balay 50557b952d6SSatish Balay This could be done better. 50657b952d6SSatish Balay */ 507f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 508f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 50957b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 510ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 51157b952d6SSatish Balay } 51257b952d6SSatish Balay 51357b952d6SSatish Balay /* do sends: 51457b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 51557b952d6SSatish Balay the ith processor 51657b952d6SSatish Balay */ 51757b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 518d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 51957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 52057b952d6SSatish Balay starts[0] = 0; 52157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 52257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 52357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 52457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 52557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 52657b952d6SSatish Balay } 52757b952d6SSatish Balay PetscFree(owner); 52857b952d6SSatish Balay starts[0] = 0; 52957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 53057b952d6SSatish Balay count = 0; 53157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 53257b952d6SSatish Balay if (procs[i]) { 533ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 53457b952d6SSatish Balay } 53557b952d6SSatish Balay } 53657b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 53757b952d6SSatish Balay 53857b952d6SSatish Balay /* Free cache space */ 539*cd1fa5fbSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 54057b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 54157b952d6SSatish Balay 54257b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 54357b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 54457b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 54557b952d6SSatish Balay baij->rmax = nmax; 54657b952d6SSatish Balay 5473a40ed3dSBarry Smith PetscFunctionReturn(0); 54857b952d6SSatish Balay } 549596b8d2eSBarry Smith #include <math.h> 550596b8d2eSBarry Smith #define HASH_KEY 0.6180339887 551bd7f49f5SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 552bd7f49f5SSatish Balay #define HASH2(size,key) ((int)((size)*fmod(((key+0.5)*HASH_KEY),1))) 55357b952d6SSatish Balay 554bd7f49f5SSatish Balay 555bd7f49f5SSatish Balay int CreateHashTable(Mat mat,double factor) 556596b8d2eSBarry Smith { 557596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 558596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 559596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 560bd7f49f5SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,h2,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 561bd7f49f5SSatish Balay int size=(int)(factor*nz),ct=0,max1=0,max2=0; 56252c87ff2SSatish Balay /* Scalar *aa=a->a,*ba=b->a; */ 563596b8d2eSBarry Smith double key; 564596b8d2eSBarry Smith static double *HT; 565596b8d2eSBarry Smith static int flag=1; 566bd7f49f5SSatish Balay extern int PetscGlobalRank; 567d64ed03dSBarry Smith 568d64ed03dSBarry Smith PetscFunctionBegin; 569bd7f49f5SSatish Balay flag = 1; 570596b8d2eSBarry Smith /* Allocate Memory for Hash Table */ 571596b8d2eSBarry Smith if (flag) { 572596b8d2eSBarry Smith HT = (double*)PetscMalloc(size*sizeof(double)); 573596b8d2eSBarry Smith flag = 0; 574596b8d2eSBarry Smith } 575596b8d2eSBarry Smith PetscMemzero(HT,size*sizeof(double)); 576596b8d2eSBarry Smith 577596b8d2eSBarry Smith /* Loop Over A */ 578596b8d2eSBarry Smith for ( i=0; i<a->n; i++ ) { 579596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 580596b8d2eSBarry Smith key = i*baij->n+aj[j]+1; 581596b8d2eSBarry Smith h1 = HASH1(size,key); 582bd7f49f5SSatish Balay h2 = HASH2(size,key); 583596b8d2eSBarry Smith 584596b8d2eSBarry Smith for ( k=1; k<size; k++ ){ 585596b8d2eSBarry Smith if (HT[(h1*k)%size] == 0.0) { 586596b8d2eSBarry Smith HT[(h1*k)%size] = key; 587596b8d2eSBarry Smith break; 588596b8d2eSBarry Smith } else ct++; 589596b8d2eSBarry Smith } 590bd7f49f5SSatish Balay if (k> max1) max1 =k; 591596b8d2eSBarry Smith } 592596b8d2eSBarry Smith } 593596b8d2eSBarry Smith /* Loop Over B */ 594596b8d2eSBarry Smith for ( i=0; i<b->n; i++ ) { 595596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 596596b8d2eSBarry Smith key = i*b->n+bj[j]+1; 597596b8d2eSBarry Smith h1 = HASH1(size,key); 598596b8d2eSBarry Smith for ( k=1; k<size; k++ ){ 599596b8d2eSBarry Smith if (HT[(h1*k)%size] == 0.0) { 600596b8d2eSBarry Smith HT[(h1*k)%size] = key; 601596b8d2eSBarry Smith break; 602596b8d2eSBarry Smith } else ct++; 603596b8d2eSBarry Smith } 604bd7f49f5SSatish Balay if (k> max2) max2 =k; 605596b8d2eSBarry Smith } 606596b8d2eSBarry Smith } 607596b8d2eSBarry Smith 608596b8d2eSBarry Smith /* Print Summary */ 609596b8d2eSBarry Smith for ( i=0,key=0.0,j=0; i<size; i++) 610596b8d2eSBarry Smith if (HT[i]) {j++;} 611596b8d2eSBarry Smith 612bd7f49f5SSatish Balay printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 613bd7f49f5SSatish Balay PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j)/j,j); 614bd7f49f5SSatish Balay PetscFree(HT); 6153a40ed3dSBarry Smith PetscFunctionReturn(0); 616596b8d2eSBarry Smith } 61757b952d6SSatish Balay 6185615d1e5SSatish Balay #undef __FUNC__ 6195615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 620ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 62157b952d6SSatish Balay { 62257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 62357b952d6SSatish Balay MPI_Status *send_status,recv_status; 62457b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 625596b8d2eSBarry Smith int bs=baij->bs,row,col,other_disassembled,flg; 62657b952d6SSatish Balay Scalar *values,val; 627e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 62857b952d6SSatish Balay 629d64ed03dSBarry Smith PetscFunctionBegin; 63057b952d6SSatish Balay /* wait on receives */ 63157b952d6SSatish Balay while (count) { 632ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 63357b952d6SSatish Balay /* unpack receives into our local space */ 63457b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 635ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 63657b952d6SSatish Balay n = n/3; 63757b952d6SSatish Balay for ( i=0; i<n; i++ ) { 63857b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 63957b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 64057b952d6SSatish Balay val = values[3*i+2]; 64157b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 64257b952d6SSatish Balay col -= baij->cstart*bs; 6436fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 644d64ed03dSBarry Smith } else { 64557b952d6SSatish Balay if (mat->was_assembled) { 646905e6a2fSBarry Smith if (!baij->colmap) { 647905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 648905e6a2fSBarry Smith } 649a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 65057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 65157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 65257b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 65357b952d6SSatish Balay } 65457b952d6SSatish Balay } 6556fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 65657b952d6SSatish Balay } 65757b952d6SSatish Balay } 65857b952d6SSatish Balay count--; 65957b952d6SSatish Balay } 66057b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 66157b952d6SSatish Balay 66257b952d6SSatish Balay /* wait on sends */ 66357b952d6SSatish Balay if (baij->nsends) { 664d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 665ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 66657b952d6SSatish Balay PetscFree(send_status); 66757b952d6SSatish Balay } 66857b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 66957b952d6SSatish Balay 67057b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 67157b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 67257b952d6SSatish Balay 67357b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 67457b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 675ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 67657b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 67757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 67857b952d6SSatish Balay } 67957b952d6SSatish Balay 6806d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 68157b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 68257b952d6SSatish Balay } 68357b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 68457b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 68557b952d6SSatish Balay 686596b8d2eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 687bd7f49f5SSatish Balay if (flg) { 688bd7f49f5SSatish Balay double fact; 689bd7f49f5SSatish Balay for ( fact=1.2; fact<2.0; fact +=0.05) CreateHashTable(mat,fact); 690bd7f49f5SSatish Balay } 69157b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 6923a40ed3dSBarry Smith PetscFunctionReturn(0); 69357b952d6SSatish Balay } 69457b952d6SSatish Balay 6955615d1e5SSatish Balay #undef __FUNC__ 6965615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 69757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 69857b952d6SSatish Balay { 69957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 70057b952d6SSatish Balay int ierr; 70157b952d6SSatish Balay 702d64ed03dSBarry Smith PetscFunctionBegin; 70357b952d6SSatish Balay if (baij->size == 1) { 70457b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 705d64ed03dSBarry Smith } else SETERRQ(1,0,"Only uniprocessor output supported"); 7063a40ed3dSBarry Smith PetscFunctionReturn(0); 70757b952d6SSatish Balay } 70857b952d6SSatish Balay 7095615d1e5SSatish Balay #undef __FUNC__ 7105615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 71157b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 71257b952d6SSatish Balay { 71357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 714cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 71557b952d6SSatish Balay FILE *fd; 71657b952d6SSatish Balay ViewerType vtype; 71757b952d6SSatish Balay 718d64ed03dSBarry Smith PetscFunctionBegin; 71957b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 72057b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 72157b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 722639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7234e220ebcSLois Curfman McInnes MatInfo info; 72457b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 72557b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7264e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 72757b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 72857b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 7294e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 7304e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 7314e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 7324e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 7334e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 7344e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 73557b952d6SSatish Balay fflush(fd); 73657b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 73757b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 7383a40ed3dSBarry Smith PetscFunctionReturn(0); 739d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 740bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 7413a40ed3dSBarry Smith PetscFunctionReturn(0); 74257b952d6SSatish Balay } 74357b952d6SSatish Balay } 74457b952d6SSatish Balay 74557b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 74657b952d6SSatish Balay Draw draw; 74757b952d6SSatish Balay PetscTruth isnull; 74857b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 7493a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 75057b952d6SSatish Balay } 75157b952d6SSatish Balay 75257b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 75357b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 75457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 75557b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 75657b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 75757b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 75857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 75957b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 76057b952d6SSatish Balay fflush(fd); 76157b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 762d64ed03dSBarry Smith } else { 76357b952d6SSatish Balay int size = baij->size; 76457b952d6SSatish Balay rank = baij->rank; 76557b952d6SSatish Balay if (size == 1) { 76657b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 767d64ed03dSBarry Smith } else { 76857b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 76957b952d6SSatish Balay Mat A; 77057b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 77157b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 77257b952d6SSatish Balay int mbs=baij->mbs; 77357b952d6SSatish Balay Scalar *a; 77457b952d6SSatish Balay 77557b952d6SSatish Balay if (!rank) { 77655843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 777d64ed03dSBarry Smith } else { 77855843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 77957b952d6SSatish Balay } 78057b952d6SSatish Balay PLogObjectParent(mat,A); 78157b952d6SSatish Balay 78257b952d6SSatish Balay /* copy over the A part */ 78357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 78457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 78557b952d6SSatish Balay row = baij->rstart; 78657b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 78757b952d6SSatish Balay 78857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 78957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 79057b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 79157b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 79257b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 79357b952d6SSatish Balay for (k=0; k<bs; k++ ) { 794cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 795cee3aa6bSSatish Balay col++; a += bs; 79657b952d6SSatish Balay } 79757b952d6SSatish Balay } 79857b952d6SSatish Balay } 79957b952d6SSatish Balay /* copy over the B part */ 80057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 80157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 80257b952d6SSatish Balay row = baij->rstart*bs; 80357b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 80457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 80557b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 80657b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 80757b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 80857b952d6SSatish Balay for (k=0; k<bs; k++ ) { 809cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 810cee3aa6bSSatish Balay col++; a += bs; 81157b952d6SSatish Balay } 81257b952d6SSatish Balay } 81357b952d6SSatish Balay } 81457b952d6SSatish Balay PetscFree(rvals); 8156d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8166d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 81755843e3eSBarry Smith /* 81855843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 81955843e3eSBarry Smith synchronized across all processors that share the Draw object 82055843e3eSBarry Smith */ 82155843e3eSBarry Smith if (!rank || vtype == DRAW_VIEWER) { 82257b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 82357b952d6SSatish Balay } 82457b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 82557b952d6SSatish Balay } 82657b952d6SSatish Balay } 8273a40ed3dSBarry Smith PetscFunctionReturn(0); 82857b952d6SSatish Balay } 82957b952d6SSatish Balay 83057b952d6SSatish Balay 83157b952d6SSatish Balay 8325615d1e5SSatish Balay #undef __FUNC__ 8335615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 834ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 83557b952d6SSatish Balay { 83657b952d6SSatish Balay Mat mat = (Mat) obj; 83757b952d6SSatish Balay int ierr; 83857b952d6SSatish Balay ViewerType vtype; 83957b952d6SSatish Balay 840d64ed03dSBarry Smith PetscFunctionBegin; 84157b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 84257b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 84357b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 84457b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 8453a40ed3dSBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 8463a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 84757b952d6SSatish Balay } 8483a40ed3dSBarry Smith PetscFunctionReturn(0); 84957b952d6SSatish Balay } 85057b952d6SSatish Balay 8515615d1e5SSatish Balay #undef __FUNC__ 8525615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 853ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 85479bdfe76SSatish Balay { 85579bdfe76SSatish Balay Mat mat = (Mat) obj; 85679bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 85779bdfe76SSatish Balay int ierr; 85879bdfe76SSatish Balay 859d64ed03dSBarry Smith PetscFunctionBegin; 8603a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 86179bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 86279bdfe76SSatish Balay #endif 86379bdfe76SSatish Balay 86483e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 86579bdfe76SSatish Balay PetscFree(baij->rowners); 86679bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 86779bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 86879bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 86979bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 87079bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 87179bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 87279bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 87330793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 87479bdfe76SSatish Balay PetscFree(baij); 87579bdfe76SSatish Balay PLogObjectDestroy(mat); 87679bdfe76SSatish Balay PetscHeaderDestroy(mat); 8773a40ed3dSBarry Smith PetscFunctionReturn(0); 87879bdfe76SSatish Balay } 87979bdfe76SSatish Balay 8805615d1e5SSatish Balay #undef __FUNC__ 8815615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 882ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 883cee3aa6bSSatish Balay { 884cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 88547b4a8eaSLois Curfman McInnes int ierr, nt; 886cee3aa6bSSatish Balay 887d64ed03dSBarry Smith PetscFunctionBegin; 888c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 88947b4a8eaSLois Curfman McInnes if (nt != a->n) { 890ab26458aSBarry Smith SETERRQ(1,0,"Incompatible partition of A and xx"); 89147b4a8eaSLois Curfman McInnes } 892c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 89347b4a8eaSLois Curfman McInnes if (nt != a->m) { 894e3372554SBarry Smith SETERRQ(1,0,"Incompatible parition of A and yy"); 89547b4a8eaSLois Curfman McInnes } 89643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 897cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 89843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 899cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 90043a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 9013a40ed3dSBarry Smith PetscFunctionReturn(0); 902cee3aa6bSSatish Balay } 903cee3aa6bSSatish Balay 9045615d1e5SSatish Balay #undef __FUNC__ 9055615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 906ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 907cee3aa6bSSatish Balay { 908cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 909cee3aa6bSSatish Balay int ierr; 910d64ed03dSBarry Smith 911d64ed03dSBarry Smith PetscFunctionBegin; 91243a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 913cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 91443a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 915cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 9163a40ed3dSBarry Smith PetscFunctionReturn(0); 917cee3aa6bSSatish Balay } 918cee3aa6bSSatish Balay 9195615d1e5SSatish Balay #undef __FUNC__ 9205615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 921ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 922cee3aa6bSSatish Balay { 923cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 924cee3aa6bSSatish Balay int ierr; 925cee3aa6bSSatish Balay 926d64ed03dSBarry Smith PetscFunctionBegin; 927cee3aa6bSSatish Balay /* do nondiagonal part */ 928cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 929cee3aa6bSSatish Balay /* send it on its way */ 930537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 931cee3aa6bSSatish Balay /* do local part */ 932cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 933cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 934cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 935cee3aa6bSSatish Balay /* but is not perhaps always true. */ 936639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 9373a40ed3dSBarry Smith PetscFunctionReturn(0); 938cee3aa6bSSatish Balay } 939cee3aa6bSSatish Balay 9405615d1e5SSatish Balay #undef __FUNC__ 9415615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 942ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 943cee3aa6bSSatish Balay { 944cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 945cee3aa6bSSatish Balay int ierr; 946cee3aa6bSSatish Balay 947d64ed03dSBarry Smith PetscFunctionBegin; 948cee3aa6bSSatish Balay /* do nondiagonal part */ 949cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 950cee3aa6bSSatish Balay /* send it on its way */ 951537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 952cee3aa6bSSatish Balay /* do local part */ 953cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 954cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 955cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 956cee3aa6bSSatish Balay /* but is not perhaps always true. */ 957537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 9583a40ed3dSBarry Smith PetscFunctionReturn(0); 959cee3aa6bSSatish Balay } 960cee3aa6bSSatish Balay 961cee3aa6bSSatish Balay /* 962cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 963cee3aa6bSSatish Balay diagonal block 964cee3aa6bSSatish Balay */ 9655615d1e5SSatish Balay #undef __FUNC__ 9665615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 967ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 968cee3aa6bSSatish Balay { 969cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 9703a40ed3dSBarry Smith int ierr; 971d64ed03dSBarry Smith 972d64ed03dSBarry Smith PetscFunctionBegin; 9733a40ed3dSBarry Smith if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 9743a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 9753a40ed3dSBarry Smith PetscFunctionReturn(0); 976cee3aa6bSSatish Balay } 977cee3aa6bSSatish Balay 9785615d1e5SSatish Balay #undef __FUNC__ 9795615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 980ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 981cee3aa6bSSatish Balay { 982cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 983cee3aa6bSSatish Balay int ierr; 984d64ed03dSBarry Smith 985d64ed03dSBarry Smith PetscFunctionBegin; 986cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 987cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 9883a40ed3dSBarry Smith PetscFunctionReturn(0); 989cee3aa6bSSatish Balay } 990026e39d0SSatish Balay 9915615d1e5SSatish Balay #undef __FUNC__ 9925615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 993ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 99457b952d6SSatish Balay { 99557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 996d64ed03dSBarry Smith 997d64ed03dSBarry Smith PetscFunctionBegin; 998bd7f49f5SSatish Balay if (m) *m = mat->M; 999bd7f49f5SSatish Balay if (n) *n = mat->N; 10003a40ed3dSBarry Smith PetscFunctionReturn(0); 100157b952d6SSatish Balay } 100257b952d6SSatish Balay 10035615d1e5SSatish Balay #undef __FUNC__ 10045615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1005ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 100657b952d6SSatish Balay { 100757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1008d64ed03dSBarry Smith 1009d64ed03dSBarry Smith PetscFunctionBegin; 101057b952d6SSatish Balay *m = mat->m; *n = mat->N; 10113a40ed3dSBarry Smith PetscFunctionReturn(0); 101257b952d6SSatish Balay } 101357b952d6SSatish Balay 10145615d1e5SSatish Balay #undef __FUNC__ 10155615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1016ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 101757b952d6SSatish Balay { 101857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1019d64ed03dSBarry Smith 1020d64ed03dSBarry Smith PetscFunctionBegin; 102157b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 10223a40ed3dSBarry Smith PetscFunctionReturn(0); 102357b952d6SSatish Balay } 102457b952d6SSatish Balay 1025acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1026acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1027acdf5bf4SSatish Balay 10285615d1e5SSatish Balay #undef __FUNC__ 10295615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1030acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1031acdf5bf4SSatish Balay { 1032acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1033acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1034acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1035d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1036d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1037acdf5bf4SSatish Balay 1038d64ed03dSBarry Smith PetscFunctionBegin; 1039e3372554SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1040acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1041acdf5bf4SSatish Balay 1042acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1043acdf5bf4SSatish Balay /* 1044acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1045acdf5bf4SSatish Balay */ 1046acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1047bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1048bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1049acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1050acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1051acdf5bf4SSatish Balay } 1052acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1053acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1054acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1055acdf5bf4SSatish Balay } 1056acdf5bf4SSatish Balay 1057e3372554SBarry Smith if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows") 1058d9d09a02SSatish Balay lrow = row - brstart; 1059acdf5bf4SSatish Balay 1060acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1061acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1062acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1063acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1064acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1065acdf5bf4SSatish Balay nztot = nzA + nzB; 1066acdf5bf4SSatish Balay 1067acdf5bf4SSatish Balay cmap = mat->garray; 1068acdf5bf4SSatish Balay if (v || idx) { 1069acdf5bf4SSatish Balay if (nztot) { 1070acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1071acdf5bf4SSatish Balay int imark = -1; 1072acdf5bf4SSatish Balay if (v) { 1073acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1074acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1075d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1076acdf5bf4SSatish Balay else break; 1077acdf5bf4SSatish Balay } 1078acdf5bf4SSatish Balay imark = i; 1079acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1080acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1081acdf5bf4SSatish Balay } 1082acdf5bf4SSatish Balay if (idx) { 1083acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1084acdf5bf4SSatish Balay if (imark > -1) { 1085acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1086bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1087acdf5bf4SSatish Balay } 1088acdf5bf4SSatish Balay } else { 1089acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1090d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1091d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1092acdf5bf4SSatish Balay else break; 1093acdf5bf4SSatish Balay } 1094acdf5bf4SSatish Balay imark = i; 1095acdf5bf4SSatish Balay } 1096d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1097d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1098acdf5bf4SSatish Balay } 1099d64ed03dSBarry Smith } else { 1100d212a18eSSatish Balay if (idx) *idx = 0; 1101d212a18eSSatish Balay if (v) *v = 0; 1102d212a18eSSatish Balay } 1103acdf5bf4SSatish Balay } 1104acdf5bf4SSatish Balay *nz = nztot; 1105acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1106acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 11073a40ed3dSBarry Smith PetscFunctionReturn(0); 1108acdf5bf4SSatish Balay } 1109acdf5bf4SSatish Balay 11105615d1e5SSatish Balay #undef __FUNC__ 11115615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1112acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1113acdf5bf4SSatish Balay { 1114acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1115d64ed03dSBarry Smith 1116d64ed03dSBarry Smith PetscFunctionBegin; 1117acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1118e3372554SBarry Smith SETERRQ(1,0,"MatGetRow not called"); 1119acdf5bf4SSatish Balay } 1120acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 11213a40ed3dSBarry Smith PetscFunctionReturn(0); 1122acdf5bf4SSatish Balay } 1123acdf5bf4SSatish Balay 11245615d1e5SSatish Balay #undef __FUNC__ 11255615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1126ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 11275a838052SSatish Balay { 11285a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1129d64ed03dSBarry Smith 1130d64ed03dSBarry Smith PetscFunctionBegin; 11315a838052SSatish Balay *bs = baij->bs; 11323a40ed3dSBarry Smith PetscFunctionReturn(0); 11335a838052SSatish Balay } 11345a838052SSatish Balay 11355615d1e5SSatish Balay #undef __FUNC__ 11365615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1137ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 113858667388SSatish Balay { 113958667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 114058667388SSatish Balay int ierr; 1141d64ed03dSBarry Smith 1142d64ed03dSBarry Smith PetscFunctionBegin; 114358667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 114458667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 11453a40ed3dSBarry Smith PetscFunctionReturn(0); 114658667388SSatish Balay } 11470ac07820SSatish Balay 11485615d1e5SSatish Balay #undef __FUNC__ 11495615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1150ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 11510ac07820SSatish Balay { 11524e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 11534e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 11547d57db60SLois Curfman McInnes int ierr; 11557d57db60SLois Curfman McInnes double isend[5], irecv[5]; 11560ac07820SSatish Balay 1157d64ed03dSBarry Smith PetscFunctionBegin; 11584e220ebcSLois Curfman McInnes info->rows_global = (double)a->M; 11594e220ebcSLois Curfman McInnes info->columns_global = (double)a->N; 11604e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 11614e220ebcSLois Curfman McInnes info->columns_local = (double)a->N; 11624e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 11634e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11644e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 11654e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11664e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 11670ac07820SSatish Balay if (flag == MAT_LOCAL) { 11684e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11694e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11704e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11714e220ebcSLois Curfman McInnes info->memory = isend[3]; 11724e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11730ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1174ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 11754e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11764e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11774e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11784e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11794e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11800ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1181ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 11824e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11834e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11844e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11854e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11864e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11870ac07820SSatish Balay } 11884e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 11894e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 11904e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 11913a40ed3dSBarry Smith PetscFunctionReturn(0); 11920ac07820SSatish Balay } 11930ac07820SSatish Balay 11945615d1e5SSatish Balay #undef __FUNC__ 11955615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1196ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 119758667388SSatish Balay { 119858667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 119958667388SSatish Balay 1200d64ed03dSBarry Smith PetscFunctionBegin; 120158667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 120258667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 12036da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1204c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 120596854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1206c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1207b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1208b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1209b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1210aeafbbfcSLois Curfman McInnes a->roworiented = 1; 121158667388SSatish Balay MatSetOption(a->A,op); 121258667388SSatish Balay MatSetOption(a->B,op); 1213b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 12146da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 121558667388SSatish Balay op == MAT_SYMMETRIC || 121658667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 121758667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 121858667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 121958667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 122058667388SSatish Balay a->roworiented = 0; 122158667388SSatish Balay MatSetOption(a->A,op); 122258667388SSatish Balay MatSetOption(a->B,op); 12232b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 122490f02eecSBarry Smith a->donotstash = 1; 1225d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1226d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1227d64ed03dSBarry Smith } else { 1228d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1229d64ed03dSBarry Smith } 12303a40ed3dSBarry Smith PetscFunctionReturn(0); 123158667388SSatish Balay } 123258667388SSatish Balay 12335615d1e5SSatish Balay #undef __FUNC__ 12345615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1235ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 12360ac07820SSatish Balay { 12370ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 12380ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 12390ac07820SSatish Balay Mat B; 12400ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 12410ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 12420ac07820SSatish Balay Scalar *a; 12430ac07820SSatish Balay 1244d64ed03dSBarry Smith PetscFunctionBegin; 1245d64ed03dSBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(1,0,"Square matrix only for in-place"); 12460ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 12470ac07820SSatish Balay CHKERRQ(ierr); 12480ac07820SSatish Balay 12490ac07820SSatish Balay /* copy over the A part */ 12500ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 12510ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12520ac07820SSatish Balay row = baij->rstart; 12530ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 12540ac07820SSatish Balay 12550ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12560ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12570ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12580ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12590ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 12600ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12610ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12620ac07820SSatish Balay col++; a += bs; 12630ac07820SSatish Balay } 12640ac07820SSatish Balay } 12650ac07820SSatish Balay } 12660ac07820SSatish Balay /* copy over the B part */ 12670ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 12680ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12690ac07820SSatish Balay row = baij->rstart*bs; 12700ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12710ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12720ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12730ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12740ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12750ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12760ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12770ac07820SSatish Balay col++; a += bs; 12780ac07820SSatish Balay } 12790ac07820SSatish Balay } 12800ac07820SSatish Balay } 12810ac07820SSatish Balay PetscFree(rvals); 12820ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12830ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12840ac07820SSatish Balay 12850ac07820SSatish Balay if (matout != PETSC_NULL) { 12860ac07820SSatish Balay *matout = B; 12870ac07820SSatish Balay } else { 12880ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 12890ac07820SSatish Balay PetscFree(baij->rowners); 12900ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 12910ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 12920ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 12930ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 12940ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 12950ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 12960ac07820SSatish Balay PetscFree(baij); 1297f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 12980ac07820SSatish Balay PetscHeaderDestroy(B); 12990ac07820SSatish Balay } 13003a40ed3dSBarry Smith PetscFunctionReturn(0); 13010ac07820SSatish Balay } 13020e95ebc0SSatish Balay 13035615d1e5SSatish Balay #undef __FUNC__ 13045615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 13050e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 13060e95ebc0SSatish Balay { 13070e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 13080e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 13090e95ebc0SSatish Balay int ierr,s1,s2,s3; 13100e95ebc0SSatish Balay 1311d64ed03dSBarry Smith PetscFunctionBegin; 13120e95ebc0SSatish Balay if (ll) { 13130e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 13140e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1315e3372554SBarry Smith if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes"); 13160e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 13170e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 13180e95ebc0SSatish Balay } 1319e3372554SBarry Smith if (rr) SETERRQ(1,0,"not supported for right vector"); 13203a40ed3dSBarry Smith PetscFunctionReturn(0); 13210e95ebc0SSatish Balay } 13220e95ebc0SSatish Balay 13230ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the 13240ac07820SSatish Balay matrix is square and the column and row owerships are identical. 13250ac07820SSatish Balay This is a BUG. The only way to fix it seems to be to access 13260ac07820SSatish Balay baij->A and baij->B directly and not through the MatZeroRows() 13270ac07820SSatish Balay routine. 13280ac07820SSatish Balay */ 13295615d1e5SSatish Balay #undef __FUNC__ 13305615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1331ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 13320ac07820SSatish Balay { 13330ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 13340ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 13350ac07820SSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work; 13360ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 13370ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 13380ac07820SSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs; 13390ac07820SSatish Balay MPI_Comm comm = A->comm; 13400ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 13410ac07820SSatish Balay MPI_Status recv_status,*send_status; 13420ac07820SSatish Balay IS istmp; 13430ac07820SSatish Balay 1344d64ed03dSBarry Smith PetscFunctionBegin; 13450ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 13460ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 13470ac07820SSatish Balay 13480ac07820SSatish Balay /* first count number of contributors to each processor */ 13490ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 13500ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 13510ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 13520ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13530ac07820SSatish Balay idx = rows[i]; 13540ac07820SSatish Balay found = 0; 13550ac07820SSatish Balay for ( j=0; j<size; j++ ) { 13560ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 13570ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 13580ac07820SSatish Balay } 13590ac07820SSatish Balay } 1360e3372554SBarry Smith if (!found) SETERRQ(1,0,"Index out of range"); 13610ac07820SSatish Balay } 13620ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 13630ac07820SSatish Balay 13640ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 13650ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1366ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 13670ac07820SSatish Balay nrecvs = work[rank]; 1368ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 13690ac07820SSatish Balay nmax = work[rank]; 13700ac07820SSatish Balay PetscFree(work); 13710ac07820SSatish Balay 13720ac07820SSatish Balay /* post receives: */ 1373d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1374d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 13750ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1376ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 13770ac07820SSatish Balay } 13780ac07820SSatish Balay 13790ac07820SSatish Balay /* do sends: 13800ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13810ac07820SSatish Balay the ith processor 13820ac07820SSatish Balay */ 13830ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1384ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 13850ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 13860ac07820SSatish Balay starts[0] = 0; 13870ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13880ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13890ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 13900ac07820SSatish Balay } 13910ac07820SSatish Balay ISRestoreIndices(is,&rows); 13920ac07820SSatish Balay 13930ac07820SSatish Balay starts[0] = 0; 13940ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13950ac07820SSatish Balay count = 0; 13960ac07820SSatish Balay for ( i=0; i<size; i++ ) { 13970ac07820SSatish Balay if (procs[i]) { 1398ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 13990ac07820SSatish Balay } 14000ac07820SSatish Balay } 14010ac07820SSatish Balay PetscFree(starts); 14020ac07820SSatish Balay 14030ac07820SSatish Balay base = owners[rank]*bs; 14040ac07820SSatish Balay 14050ac07820SSatish Balay /* wait on receives */ 14060ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 14070ac07820SSatish Balay source = lens + nrecvs; 14080ac07820SSatish Balay count = nrecvs; slen = 0; 14090ac07820SSatish Balay while (count) { 1410ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 14110ac07820SSatish Balay /* unpack receives into our local space */ 1412ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 14130ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 14140ac07820SSatish Balay lens[imdex] = n; 14150ac07820SSatish Balay slen += n; 14160ac07820SSatish Balay count--; 14170ac07820SSatish Balay } 14180ac07820SSatish Balay PetscFree(recv_waits); 14190ac07820SSatish Balay 14200ac07820SSatish Balay /* move the data into the send scatter */ 14210ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 14220ac07820SSatish Balay count = 0; 14230ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 14240ac07820SSatish Balay values = rvalues + i*nmax; 14250ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 14260ac07820SSatish Balay lrows[count++] = values[j] - base; 14270ac07820SSatish Balay } 14280ac07820SSatish Balay } 14290ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 14300ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 14310ac07820SSatish Balay 14320ac07820SSatish Balay /* actually zap the local rows */ 1433029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 14340ac07820SSatish Balay PLogObjectParent(A,istmp); 14350ac07820SSatish Balay PetscFree(lrows); 14360ac07820SSatish Balay ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 14370ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 14380ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 14390ac07820SSatish Balay 14400ac07820SSatish Balay /* wait on sends */ 14410ac07820SSatish Balay if (nsends) { 1442d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1443ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 14440ac07820SSatish Balay PetscFree(send_status); 14450ac07820SSatish Balay } 14460ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 14470ac07820SSatish Balay 14483a40ed3dSBarry Smith PetscFunctionReturn(0); 14490ac07820SSatish Balay } 1450ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 14515615d1e5SSatish Balay #undef __FUNC__ 14525615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1453ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1454ba4ca20aSSatish Balay { 1455ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 14563a40ed3dSBarry Smith int ierr; 1457ba4ca20aSSatish Balay 1458d64ed03dSBarry Smith PetscFunctionBegin; 14593a40ed3dSBarry Smith if (!a->rank) { 14603a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 14613a40ed3dSBarry Smith } 14623a40ed3dSBarry Smith PetscFunctionReturn(0); 1463ba4ca20aSSatish Balay } 14640ac07820SSatish Balay 14655615d1e5SSatish Balay #undef __FUNC__ 14665615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1467ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1468bb5a7306SBarry Smith { 1469bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1470bb5a7306SBarry Smith int ierr; 1471d64ed03dSBarry Smith 1472d64ed03dSBarry Smith PetscFunctionBegin; 1473bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 14743a40ed3dSBarry Smith PetscFunctionReturn(0); 1475bb5a7306SBarry Smith } 1476bb5a7306SBarry Smith 14770ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14780ac07820SSatish Balay 147979bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 148079bdfe76SSatish Balay static struct _MatOps MatOps = { 1481bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14824c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14834c50302cSBarry Smith 0,0,0,0, 14840ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14850e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 148658667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14874c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14884c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14894c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 149094a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1491d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1492ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1493bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1494ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 149579bdfe76SSatish Balay 149679bdfe76SSatish Balay 14975615d1e5SSatish Balay #undef __FUNC__ 14985615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 149979bdfe76SSatish Balay /*@C 150079bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 150179bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 150279bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 150379bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 150479bdfe76SSatish Balay performance can be increased by more than a factor of 50. 150579bdfe76SSatish Balay 150679bdfe76SSatish Balay Input Parameters: 150779bdfe76SSatish Balay . comm - MPI communicator 150879bdfe76SSatish Balay . bs - size of blockk 150979bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 151092e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 151192e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 151292e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 151392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 151492e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 151579bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 151692e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 151779bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 151879bdfe76SSatish Balay submatrix (same for all local rows) 151992e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 152092e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 152192e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 152292e8d321SLois Curfman McInnes it is zero. 152392e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 152479bdfe76SSatish Balay submatrix (same for all local rows). 152592e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 152692e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 152792e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 152879bdfe76SSatish Balay 152979bdfe76SSatish Balay Output Parameter: 153079bdfe76SSatish Balay . A - the matrix 153179bdfe76SSatish Balay 153279bdfe76SSatish Balay Notes: 153379bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 153479bdfe76SSatish Balay (possibly both). 153579bdfe76SSatish Balay 153679bdfe76SSatish Balay Storage Information: 153779bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 153879bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 153979bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 154079bdfe76SSatish Balay local matrix (a rectangular submatrix). 154179bdfe76SSatish Balay 154279bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 154379bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 154479bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 154579bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 154679bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 154779bdfe76SSatish Balay 154879bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 154979bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 155079bdfe76SSatish Balay 155179bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 155279bdfe76SSatish Balay $ ------------------- 155379bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 155479bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 155579bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 155679bdfe76SSatish Balay $ ------------------- 155779bdfe76SSatish Balay $ 155879bdfe76SSatish Balay 155979bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 156079bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 156179bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 156257b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 156379bdfe76SSatish Balay 1564d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1565d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 156679bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 156792e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 156892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15696da5968aSLois Curfman McInnes matrices. 157079bdfe76SSatish Balay 157192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 157279bdfe76SSatish Balay 157379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 157479bdfe76SSatish Balay @*/ 157579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 157679bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 157779bdfe76SSatish Balay { 157879bdfe76SSatish Balay Mat B; 157979bdfe76SSatish Balay Mat_MPIBAIJ *b; 15804c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 158179bdfe76SSatish Balay 1582d64ed03dSBarry Smith PetscFunctionBegin; 15833914022bSBarry Smith if (bs < 1) SETERRQ(1,0,"Invalid block size specified, must be positive"); 15843914022bSBarry Smith 15853914022bSBarry Smith MPI_Comm_size(comm,&size); 15863914022bSBarry Smith if (size == 1) { 15873914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 15883914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 15893914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 15903a40ed3dSBarry Smith PetscFunctionReturn(0); 15913914022bSBarry Smith } 15923914022bSBarry Smith 159379bdfe76SSatish Balay *A = 0; 1594d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 159579bdfe76SSatish Balay PLogObjectCreate(B); 159679bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 159779bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 159879bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15994c50302cSBarry Smith 160079bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 160179bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 160290f02eecSBarry Smith B->mapping = 0; 160379bdfe76SSatish Balay B->factor = 0; 160479bdfe76SSatish Balay B->assembled = PETSC_FALSE; 160579bdfe76SSatish Balay 1606e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 160779bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 160879bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 160979bdfe76SSatish Balay 1610d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1611e3372554SBarry Smith SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1612d64ed03dSBarry Smith } 1613e3372554SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified"); 1614d64ed03dSBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) SETERRQ(1,0,"either N or n should be specified"); 1615cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1616cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 161779bdfe76SSatish Balay 161879bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 161979bdfe76SSatish Balay work[0] = m; work[1] = n; 162079bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1621ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 162279bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 162379bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 162479bdfe76SSatish Balay } 162579bdfe76SSatish Balay if (m == PETSC_DECIDE) { 162679bdfe76SSatish Balay Mbs = M/bs; 1627e3372554SBarry Smith if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize"); 162879bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 162979bdfe76SSatish Balay m = mbs*bs; 163079bdfe76SSatish Balay } 163179bdfe76SSatish Balay if (n == PETSC_DECIDE) { 163279bdfe76SSatish Balay Nbs = N/bs; 1633e3372554SBarry Smith if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize"); 163479bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 163579bdfe76SSatish Balay n = nbs*bs; 163679bdfe76SSatish Balay } 1637e3372554SBarry Smith if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize"); 163879bdfe76SSatish Balay 163979bdfe76SSatish Balay b->m = m; B->m = m; 164079bdfe76SSatish Balay b->n = n; B->n = n; 164179bdfe76SSatish Balay b->N = N; B->N = N; 164279bdfe76SSatish Balay b->M = M; B->M = M; 164379bdfe76SSatish Balay b->bs = bs; 164479bdfe76SSatish Balay b->bs2 = bs*bs; 164579bdfe76SSatish Balay b->mbs = mbs; 164679bdfe76SSatish Balay b->nbs = nbs; 164779bdfe76SSatish Balay b->Mbs = Mbs; 164879bdfe76SSatish Balay b->Nbs = Nbs; 164979bdfe76SSatish Balay 165079bdfe76SSatish Balay /* build local table of row and column ownerships */ 165179bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1652f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 16530ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 1654ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 165579bdfe76SSatish Balay b->rowners[0] = 0; 165679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 165779bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 165879bdfe76SSatish Balay } 165979bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 166079bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 16614fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 16624fa0d573SSatish Balay b->rend_bs = b->rend * bs; 16634fa0d573SSatish Balay 1664ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 166579bdfe76SSatish Balay b->cowners[0] = 0; 166679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 166779bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 166879bdfe76SSatish Balay } 166979bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 167079bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 16714fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 16724fa0d573SSatish Balay b->cend_bs = b->cend * bs; 167379bdfe76SSatish Balay 167479bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1675029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 167679bdfe76SSatish Balay PLogObjectParent(B,b->A); 167779bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1678029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 167979bdfe76SSatish Balay PLogObjectParent(B,b->B); 168079bdfe76SSatish Balay 168179bdfe76SSatish Balay /* build cache for off array entries formed */ 168279bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 168390f02eecSBarry Smith b->donotstash = 0; 168479bdfe76SSatish Balay b->colmap = 0; 168579bdfe76SSatish Balay b->garray = 0; 168679bdfe76SSatish Balay b->roworiented = 1; 168779bdfe76SSatish Balay 168830793edcSSatish Balay /* stuff used in block assembly */ 168930793edcSSatish Balay b->barray = 0; 169030793edcSSatish Balay 169179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 169279bdfe76SSatish Balay b->lvec = 0; 169379bdfe76SSatish Balay b->Mvctx = 0; 169479bdfe76SSatish Balay 169579bdfe76SSatish Balay /* stuff for MatGetRow() */ 169679bdfe76SSatish Balay b->rowindices = 0; 169779bdfe76SSatish Balay b->rowvalues = 0; 169879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 169979bdfe76SSatish Balay 170079bdfe76SSatish Balay *A = B; 17013a40ed3dSBarry Smith PetscFunctionReturn(0); 170279bdfe76SSatish Balay } 1703026e39d0SSatish Balay 17045615d1e5SSatish Balay #undef __FUNC__ 17055615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 17060ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 17070ac07820SSatish Balay { 17080ac07820SSatish Balay Mat mat; 17090ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 17100ac07820SSatish Balay int ierr, len=0, flg; 17110ac07820SSatish Balay 1712d64ed03dSBarry Smith PetscFunctionBegin; 17130ac07820SSatish Balay *newmat = 0; 1714d4bb536fSBarry Smith PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 17150ac07820SSatish Balay PLogObjectCreate(mat); 17160ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 17170ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 17180ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 17190ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 17200ac07820SSatish Balay mat->factor = matin->factor; 17210ac07820SSatish Balay mat->assembled = PETSC_TRUE; 17220ac07820SSatish Balay 17230ac07820SSatish Balay a->m = mat->m = oldmat->m; 17240ac07820SSatish Balay a->n = mat->n = oldmat->n; 17250ac07820SSatish Balay a->M = mat->M = oldmat->M; 17260ac07820SSatish Balay a->N = mat->N = oldmat->N; 17270ac07820SSatish Balay 17280ac07820SSatish Balay a->bs = oldmat->bs; 17290ac07820SSatish Balay a->bs2 = oldmat->bs2; 17300ac07820SSatish Balay a->mbs = oldmat->mbs; 17310ac07820SSatish Balay a->nbs = oldmat->nbs; 17320ac07820SSatish Balay a->Mbs = oldmat->Mbs; 17330ac07820SSatish Balay a->Nbs = oldmat->Nbs; 17340ac07820SSatish Balay 17350ac07820SSatish Balay a->rstart = oldmat->rstart; 17360ac07820SSatish Balay a->rend = oldmat->rend; 17370ac07820SSatish Balay a->cstart = oldmat->cstart; 17380ac07820SSatish Balay a->cend = oldmat->cend; 17390ac07820SSatish Balay a->size = oldmat->size; 17400ac07820SSatish Balay a->rank = oldmat->rank; 1741e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 17420ac07820SSatish Balay a->rowvalues = 0; 17430ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 174430793edcSSatish Balay a->barray = 0; 17450ac07820SSatish Balay 17460ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1747f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 17480ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 17490ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 17500ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17510ac07820SSatish Balay if (oldmat->colmap) { 17520ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 17530ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 17540ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 17550ac07820SSatish Balay } else a->colmap = 0; 17560ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 17570ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 17580ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 17590ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 17600ac07820SSatish Balay } else a->garray = 0; 17610ac07820SSatish Balay 17620ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 17630ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 17640ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 17650ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 17660ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 17670ac07820SSatish Balay PLogObjectParent(mat,a->A); 17680ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 17690ac07820SSatish Balay PLogObjectParent(mat,a->B); 17700ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 17710ac07820SSatish Balay if (flg) { 17720ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 17730ac07820SSatish Balay } 17740ac07820SSatish Balay *newmat = mat; 17753a40ed3dSBarry Smith PetscFunctionReturn(0); 17760ac07820SSatish Balay } 177757b952d6SSatish Balay 177857b952d6SSatish Balay #include "sys.h" 177957b952d6SSatish Balay 17805615d1e5SSatish Balay #undef __FUNC__ 17815615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 178257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 178357b952d6SSatish Balay { 178457b952d6SSatish Balay Mat A; 178557b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 178657b952d6SSatish Balay Scalar *vals,*buf; 178757b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 178857b952d6SSatish Balay MPI_Status status; 1789cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 179057b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 179157b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 179257b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 179357b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 179457b952d6SSatish Balay 1795d64ed03dSBarry Smith PetscFunctionBegin; 179657b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 179757b952d6SSatish Balay bs2 = bs*bs; 179857b952d6SSatish Balay 179957b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 180057b952d6SSatish Balay if (!rank) { 180157b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1802e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1803e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1804d64ed03dSBarry Smith if (header[3] < 0) { 1805d64ed03dSBarry Smith SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 1806d64ed03dSBarry Smith } 18076c5fab8fSBarry Smith } 1808d64ed03dSBarry Smith 1809ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 181057b952d6SSatish Balay M = header[1]; N = header[2]; 181157b952d6SSatish Balay 1812e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 181357b952d6SSatish Balay 181457b952d6SSatish Balay /* 181557b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 181657b952d6SSatish Balay divisible by the blocksize 181757b952d6SSatish Balay */ 181857b952d6SSatish Balay Mbs = M/bs; 181957b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 182057b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 182157b952d6SSatish Balay else Mbs++; 182257b952d6SSatish Balay if (extra_rows &&!rank) { 1823b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 182457b952d6SSatish Balay } 1825537820f0SBarry Smith 182657b952d6SSatish Balay /* determine ownership of all rows */ 182757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 182857b952d6SSatish Balay m = mbs * bs; 1829cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1830cee3aa6bSSatish Balay browners = rowners + size + 1; 1831ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 183257b952d6SSatish Balay rowners[0] = 0; 1833cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1834cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 183557b952d6SSatish Balay rstart = rowners[rank]; 183657b952d6SSatish Balay rend = rowners[rank+1]; 183757b952d6SSatish Balay 183857b952d6SSatish Balay /* distribute row lengths to all processors */ 183957b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 184057b952d6SSatish Balay if (!rank) { 184157b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 1842e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 184357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 184457b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1845cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1846ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 184757b952d6SSatish Balay PetscFree(sndcounts); 1848d64ed03dSBarry Smith } else { 1849ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 185057b952d6SSatish Balay } 185157b952d6SSatish Balay 185257b952d6SSatish Balay if (!rank) { 185357b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 185457b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 185557b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 185657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 185757b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 185857b952d6SSatish Balay procsnz[i] += rowlengths[j]; 185957b952d6SSatish Balay } 186057b952d6SSatish Balay } 186157b952d6SSatish Balay PetscFree(rowlengths); 186257b952d6SSatish Balay 186357b952d6SSatish Balay /* determine max buffer needed and allocate it */ 186457b952d6SSatish Balay maxnz = 0; 186557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 186657b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 186757b952d6SSatish Balay } 186857b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 186957b952d6SSatish Balay 187057b952d6SSatish Balay /* read in my part of the matrix column indices */ 187157b952d6SSatish Balay nz = procsnz[0]; 187257b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 187357b952d6SSatish Balay mycols = ibuf; 1874cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 1875e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1876cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1877cee3aa6bSSatish Balay 187857b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 187957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 188057b952d6SSatish Balay nz = procsnz[i]; 1881e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1882ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 188357b952d6SSatish Balay } 188457b952d6SSatish Balay /* read in the stuff for the last proc */ 188557b952d6SSatish Balay if ( size != 1 ) { 188657b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 1887e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 188857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1889ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 189057b952d6SSatish Balay } 189157b952d6SSatish Balay PetscFree(cols); 1892d64ed03dSBarry Smith } else { 189357b952d6SSatish Balay /* determine buffer space needed for message */ 189457b952d6SSatish Balay nz = 0; 189557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 189657b952d6SSatish Balay nz += locrowlens[i]; 189757b952d6SSatish Balay } 189857b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 189957b952d6SSatish Balay mycols = ibuf; 190057b952d6SSatish Balay /* receive message of column indices*/ 1901ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1902ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1903e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 190457b952d6SSatish Balay } 190557b952d6SSatish Balay 190657b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1907cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1908cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 190957b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1910cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 191157b952d6SSatish Balay masked1 = mask + Mbs; 191257b952d6SSatish Balay masked2 = masked1 + Mbs; 191357b952d6SSatish Balay rowcount = 0; nzcount = 0; 191457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 191557b952d6SSatish Balay dcount = 0; 191657b952d6SSatish Balay odcount = 0; 191757b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 191857b952d6SSatish Balay kmax = locrowlens[rowcount]; 191957b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 192057b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 192157b952d6SSatish Balay if (!mask[tmp]) { 192257b952d6SSatish Balay mask[tmp] = 1; 192357b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 192457b952d6SSatish Balay else masked1[dcount++] = tmp; 192557b952d6SSatish Balay } 192657b952d6SSatish Balay } 192757b952d6SSatish Balay rowcount++; 192857b952d6SSatish Balay } 1929cee3aa6bSSatish Balay 193057b952d6SSatish Balay dlens[i] = dcount; 193157b952d6SSatish Balay odlens[i] = odcount; 1932cee3aa6bSSatish Balay 193357b952d6SSatish Balay /* zero out the mask elements we set */ 193457b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 193557b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 193657b952d6SSatish Balay } 1937cee3aa6bSSatish Balay 193857b952d6SSatish Balay /* create our matrix */ 1939537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1940537820f0SBarry Smith CHKERRQ(ierr); 194157b952d6SSatish Balay A = *newmat; 19426d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 194357b952d6SSatish Balay 194457b952d6SSatish Balay if (!rank) { 194557b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 194657b952d6SSatish Balay /* read in my part of the matrix numerical values */ 194757b952d6SSatish Balay nz = procsnz[0]; 194857b952d6SSatish Balay vals = buf; 1949cee3aa6bSSatish Balay mycols = ibuf; 1950cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 1951e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1952cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1953537820f0SBarry Smith 195457b952d6SSatish Balay /* insert into matrix */ 195557b952d6SSatish Balay jj = rstart*bs; 195657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 195757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 195857b952d6SSatish Balay mycols += locrowlens[i]; 195957b952d6SSatish Balay vals += locrowlens[i]; 196057b952d6SSatish Balay jj++; 196157b952d6SSatish Balay } 196257b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 196357b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 196457b952d6SSatish Balay nz = procsnz[i]; 196557b952d6SSatish Balay vals = buf; 1966e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1967ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 196857b952d6SSatish Balay } 196957b952d6SSatish Balay /* the last proc */ 197057b952d6SSatish Balay if ( size != 1 ){ 197157b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1972cee3aa6bSSatish Balay vals = buf; 1973e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 197457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1975ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 197657b952d6SSatish Balay } 197757b952d6SSatish Balay PetscFree(procsnz); 1978d64ed03dSBarry Smith } else { 197957b952d6SSatish Balay /* receive numeric values */ 198057b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 198157b952d6SSatish Balay 198257b952d6SSatish Balay /* receive message of values*/ 198357b952d6SSatish Balay vals = buf; 1984cee3aa6bSSatish Balay mycols = ibuf; 1985ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1986ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1987e3372554SBarry Smith if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 198857b952d6SSatish Balay 198957b952d6SSatish Balay /* insert into matrix */ 199057b952d6SSatish Balay jj = rstart*bs; 1991cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 199257b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 199357b952d6SSatish Balay mycols += locrowlens[i]; 199457b952d6SSatish Balay vals += locrowlens[i]; 199557b952d6SSatish Balay jj++; 199657b952d6SSatish Balay } 199757b952d6SSatish Balay } 199857b952d6SSatish Balay PetscFree(locrowlens); 199957b952d6SSatish Balay PetscFree(buf); 200057b952d6SSatish Balay PetscFree(ibuf); 200157b952d6SSatish Balay PetscFree(rowners); 200257b952d6SSatish Balay PetscFree(dlens); 2003cee3aa6bSSatish Balay PetscFree(mask); 20046d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 20056d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 20063a40ed3dSBarry Smith PetscFunctionReturn(0); 200757b952d6SSatish Balay } 200857b952d6SSatish Balay 200957b952d6SSatish Balay 2010