1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*b9e4cc15SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.98 1998/01/12 21:15:42 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 143b2fbd54SBarry Smith 15537820f0SBarry Smith /* 16537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1957b952d6SSatish Balay length of colmap equals the global matrix length. 2057b952d6SSatish Balay */ 215615d1e5SSatish Balay #undef __FUNC__ 225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2457b952d6SSatish Balay { 2557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 2857b952d6SSatish Balay 29d64ed03dSBarry Smith PetscFunctionBegin; 30758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3257b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 343a40ed3dSBarry Smith PetscFunctionReturn(0); 3557b952d6SSatish Balay } 3657b952d6SSatish Balay 3780c1aa95SSatish Balay #define CHUNKSIZE 10 3880c1aa95SSatish Balay 39f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4080c1aa95SSatish Balay { \ 4180c1aa95SSatish Balay \ 4280c1aa95SSatish Balay brow = row/bs; \ 4380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 4580c1aa95SSatish Balay bcol = col/bs; \ 4680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 47ab26458aSBarry Smith low = 0; high = nrow; \ 48ab26458aSBarry Smith while (high-low > 3) { \ 49ab26458aSBarry Smith t = (low+high)/2; \ 50ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 51ab26458aSBarry Smith else low = t; \ 52ab26458aSBarry Smith } \ 53ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 5480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 5580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 5680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 57eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 58eada6651SSatish Balay else *bap = value; \ 59ac7a638eSSatish Balay goto a_noinsert; \ 6080c1aa95SSatish Balay } \ 6180c1aa95SSatish Balay } \ 6289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 63a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 6480c1aa95SSatish Balay if (nrow >= rmax) { \ 6580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 6680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 6780c1aa95SSatish Balay Scalar *new_a; \ 6880c1aa95SSatish Balay \ 69a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7089280ab3SLois Curfman McInnes \ 7180c1aa95SSatish Balay /* malloc new storage space */ \ 7280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 7380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 7480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 7580c1aa95SSatish Balay new_i = new_j + new_nz; \ 7680c1aa95SSatish Balay \ 7780c1aa95SSatish Balay /* copy over old data into new slots */ \ 7880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 7980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 8180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 8280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 8380c1aa95SSatish Balay len*sizeof(int)); \ 8480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 8580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 8680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 8780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 8880c1aa95SSatish Balay /* free up old matrix storage */ \ 8980c1aa95SSatish Balay PetscFree(a->a); \ 9080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 9180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 9280c1aa95SSatish Balay a->singlemalloc = 1; \ 9380c1aa95SSatish Balay \ 9480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 9680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 9780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 9880c1aa95SSatish Balay a->reallocs++; \ 9980c1aa95SSatish Balay a->nz++; \ 10080c1aa95SSatish Balay } \ 10180c1aa95SSatish Balay N = nrow++ - 1; \ 10280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 10380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 10480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 10580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 10680c1aa95SSatish Balay } \ 10780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 10880c1aa95SSatish Balay rp[_i] = bcol; \ 10980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 110ac7a638eSSatish Balay a_noinsert:; \ 11180c1aa95SSatish Balay ailen[brow] = nrow; \ 11280c1aa95SSatish Balay } 11357b952d6SSatish Balay 114ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115ac7a638eSSatish Balay { \ 116ac7a638eSSatish Balay \ 117ac7a638eSSatish Balay brow = row/bs; \ 118ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 120ac7a638eSSatish Balay bcol = col/bs; \ 121ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 122ac7a638eSSatish Balay low = 0; high = nrow; \ 123ac7a638eSSatish Balay while (high-low > 3) { \ 124ac7a638eSSatish Balay t = (low+high)/2; \ 125ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 126ac7a638eSSatish Balay else low = t; \ 127ac7a638eSSatish Balay } \ 128ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 129ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 130ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 131ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 132ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 133ac7a638eSSatish Balay else *bap = value; \ 134ac7a638eSSatish Balay goto b_noinsert; \ 135ac7a638eSSatish Balay } \ 136ac7a638eSSatish Balay } \ 13789280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 138a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139ac7a638eSSatish Balay if (nrow >= rmax) { \ 140ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14174c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142ac7a638eSSatish Balay Scalar *new_a; \ 143ac7a638eSSatish Balay \ 144a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 14589280ab3SLois Curfman McInnes \ 146ac7a638eSSatish Balay /* malloc new storage space */ \ 14774c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 150ac7a638eSSatish Balay new_i = new_j + new_nz; \ 151ac7a638eSSatish Balay \ 152ac7a638eSSatish Balay /* copy over old data into new slots */ \ 153ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 15474c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158ac7a638eSSatish Balay len*sizeof(int)); \ 159ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163ac7a638eSSatish Balay /* free up old matrix storage */ \ 16474c639caSSatish Balay PetscFree(b->a); \ 16574c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16774c639caSSatish Balay b->singlemalloc = 1; \ 168ac7a638eSSatish Balay \ 169ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 17174c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 17274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 17374c639caSSatish Balay b->reallocs++; \ 17474c639caSSatish Balay b->nz++; \ 175ac7a638eSSatish Balay } \ 176ac7a638eSSatish Balay N = nrow++ - 1; \ 177ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 178ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 179ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 180ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183ac7a638eSSatish Balay rp[_i] = bcol; \ 184ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 185ac7a638eSSatish Balay b_noinsert:; \ 186ac7a638eSSatish Balay bilen[brow] = nrow; \ 187ac7a638eSSatish Balay } 188ac7a638eSSatish Balay 1895615d1e5SSatish Balay #undef __FUNC__ 1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 19257b952d6SSatish Balay { 19357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 19457b952d6SSatish Balay Scalar value; 1954fa0d573SSatish Balay int ierr,i,j,row,col; 1964fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1974fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1984fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 19957b952d6SSatish Balay 200eada6651SSatish Balay /* Some Variables required in the macro */ 20180c1aa95SSatish Balay Mat A = baij->A; 20280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204ac7a638eSSatish Balay Scalar *aa=a->a; 205ac7a638eSSatish Balay 206ac7a638eSSatish Balay Mat B = baij->B; 207ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209ac7a638eSSatish Balay Scalar *ba=b->a; 210ac7a638eSSatish Balay 211ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 213ac7a638eSSatish Balay Scalar *ap,*bap; 21480c1aa95SSatish Balay 215d64ed03dSBarry Smith PetscFunctionBegin; 21657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 218a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220639f9d9dSBarry Smith #endif 22157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 22257b952d6SSatish Balay row = im[i] - rstart_orig; 22357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 22457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 22557b952d6SSatish Balay col = in[j] - cstart_orig; 22657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22957b952d6SSatish Balay } 2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 231a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233639f9d9dSBarry Smith #endif 23457b952d6SSatish Balay else { 23557b952d6SSatish Balay if (mat->was_assembled) { 236905e6a2fSBarry Smith if (!baij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 24057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 24157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 24257b952d6SSatish Balay col = in[j]; 2439bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2449bf004c3SSatish Balay B = baij->B; 2459bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2469bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2479bf004c3SSatish Balay ba=b->a; 24857b952d6SSatish Balay } 249d64ed03dSBarry Smith } else col = in[j]; 25057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 255d64ed03dSBarry Smith } else { 25690f02eecSBarry Smith if (roworiented && !baij->donotstash) { 25757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258d64ed03dSBarry Smith } else { 25990f02eecSBarry Smith if (!baij->donotstash) { 26057b952d6SSatish Balay row = im[i]; 26157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 26257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 26357b952d6SSatish Balay } 26457b952d6SSatish Balay } 26557b952d6SSatish Balay } 26657b952d6SSatish Balay } 26790f02eecSBarry Smith } 2683a40ed3dSBarry Smith PetscFunctionReturn(0); 26957b952d6SSatish Balay } 27057b952d6SSatish Balay 271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272ab26458aSBarry Smith #undef __FUNC__ 273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275ab26458aSBarry Smith { 276ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27730793edcSSatish Balay Scalar *value,*barray=baij->barray; 278abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 279ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 280ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 281ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282ab26458aSBarry Smith 28330793edcSSatish Balay if(!barray) { 28447513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 28530793edcSSatish Balay } 28630793edcSSatish Balay 287ab26458aSBarry Smith if (roworiented) { 288ab26458aSBarry Smith stepval = (n-1)*bs; 289ab26458aSBarry Smith } else { 290ab26458aSBarry Smith stepval = (m-1)*bs; 291ab26458aSBarry Smith } 292ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 294a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295a8c6a408SBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296ab26458aSBarry Smith #endif 297ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 298ab26458aSBarry Smith row = im[i] - rstart; 299ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 30015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 30115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 30215b57d14SSatish Balay barray = v + i*bs2; 30315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 30415b57d14SSatish Balay barray = v + j*bs2; 30515b57d14SSatish Balay } else { /* Here a copy is required */ 306ab26458aSBarry Smith if (roworiented) { 307ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 308ab26458aSBarry Smith } else { 309ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 310abef11f7SSatish Balay } 31147513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 31247513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 31330793edcSSatish Balay *barray++ = *value++; 31447513183SBarry Smith } 31547513183SBarry Smith } 31630793edcSSatish Balay barray -=bs2; 31715b57d14SSatish Balay } 318abef11f7SSatish Balay 319abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 320abef11f7SSatish Balay col = in[j] - cstart; 32130793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322ab26458aSBarry Smith } 3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 324a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325a8c6a408SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326ab26458aSBarry Smith #endif 327ab26458aSBarry Smith else { 328ab26458aSBarry Smith if (mat->was_assembled) { 329ab26458aSBarry Smith if (!baij->colmap) { 330ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331ab26458aSBarry Smith } 332a5eb4965SSatish Balay 3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 334a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335a5eb4965SSatish Balay #endif 336a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 337ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339ab26458aSBarry Smith col = in[j]; 340ab26458aSBarry Smith } 341ab26458aSBarry Smith } 342ab26458aSBarry Smith else col = in[j]; 34330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344ab26458aSBarry Smith } 345ab26458aSBarry Smith } 346d64ed03dSBarry Smith } else { 347ab26458aSBarry Smith if (!baij->donotstash) { 348ab26458aSBarry Smith if (roworiented ) { 349abef11f7SSatish Balay row = im[i]*bs; 350abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 351abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 352abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 353abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355abef11f7SSatish Balay } 356ab26458aSBarry Smith } 357ab26458aSBarry Smith } 358d64ed03dSBarry Smith } else { 359ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 360abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 361abef11f7SSatish Balay col = in[j]*bs; 362abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 363abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365ab26458aSBarry Smith } 366ab26458aSBarry Smith } 367ab26458aSBarry Smith } 368abef11f7SSatish Balay } 369abef11f7SSatish Balay } 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 3723a40ed3dSBarry Smith PetscFunctionReturn(0); 373ab26458aSBarry Smith } 3740bdbc534SSatish Balay #include <math.h> 3750bdbc534SSatish Balay #define HASH_KEY 0.6180339887 3760bdbc534SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377ab26458aSBarry Smith 3785615d1e5SSatish Balay #undef __FUNC__ 3790bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 3800bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3810bdbc534SSatish Balay { 3820bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3830bdbc534SSatish Balay int ierr,i,j,row,col; 3840bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 3850bdbc534SSatish Balay int rend_orig=baij->rend_bs; 3860bdbc534SSatish Balay 3870bdbc534SSatish Balay int h1,key,size=baij->ht_size,k,bs=baij->bs; 388fef45726SSatish Balay int * HT = baij->ht; 389*b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 3900bdbc534SSatish Balay 3910bdbc534SSatish Balay 3920bdbc534SSatish Balay PetscFunctionBegin; 3930bdbc534SSatish Balay 3940bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 3950bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 3960bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 3970bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 3980bdbc534SSatish Balay #endif 3990bdbc534SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 4000bdbc534SSatish Balay row = im[i]; 4010bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4020bdbc534SSatish Balay col = in[j]; 4030bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4040bdbc534SSatish Balay /* Look up into the Hash Table */ 4050bdbc534SSatish Balay key = (row/bs)*baij->Nbs+(col/bs)+1; 4060bdbc534SSatish Balay h1 = HASH1(size,key); 4070bdbc534SSatish Balay 4080bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 4090bdbc534SSatish Balay if (HT[(h1+k)%size] == key) { 4100bdbc534SSatish Balay if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 4110bdbc534SSatish Balay else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 4120bdbc534SSatish Balay break; 4130bdbc534SSatish Balay } 4140bdbc534SSatish Balay } 415a74b47caSSatish Balay if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 4160bdbc534SSatish Balay } 4170bdbc534SSatish Balay } else { 4180bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 4190bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 4200bdbc534SSatish Balay } else { 4210bdbc534SSatish Balay if (!baij->donotstash) { 4220bdbc534SSatish Balay row = im[i]; 4230bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4240bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 4250bdbc534SSatish Balay } 4260bdbc534SSatish Balay } 4270bdbc534SSatish Balay } 4280bdbc534SSatish Balay } 4290bdbc534SSatish Balay } 4300bdbc534SSatish Balay PetscFunctionReturn(0); 4310bdbc534SSatish Balay } 4320bdbc534SSatish Balay 4330bdbc534SSatish Balay #undef __FUNC__ 4340bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4350bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4360bdbc534SSatish Balay { 4370bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4380bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4390bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 440b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 4410bdbc534SSatish Balay 4420bdbc534SSatish Balay int h1,key,size=baij->ht_size; 443fef45726SSatish Balay int * HT = baij->ht; 444*b9e4cc15SSatish Balay Scalar ** HD = baij->hd,*value,*baij_a; 4450bdbc534SSatish Balay 446d0a41580SSatish Balay PetscFunctionBegin; 447d0a41580SSatish Balay 4480bdbc534SSatish Balay if (roworiented) { 4490bdbc534SSatish Balay stepval = (n-1)*bs; 4500bdbc534SSatish Balay } else { 4510bdbc534SSatish Balay stepval = (m-1)*bs; 4520bdbc534SSatish Balay } 4530bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4540bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4550bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4560bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4570bdbc534SSatish Balay #endif 4580bdbc534SSatish Balay if (im[i] >= rstart && im[i] < rend) { 4590bdbc534SSatish Balay row = im[i]; 4600bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4610bdbc534SSatish Balay col = in[j]; 4620bdbc534SSatish Balay 4630bdbc534SSatish Balay /* Look up into the Hash Table */ 4640bdbc534SSatish Balay key = row*baij->Nbs+col+1; 4650bdbc534SSatish Balay h1 = HASH1(size,key); 4660bdbc534SSatish Balay 4670bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 4680bdbc534SSatish Balay if (HT[(h1+k)%size] == key) { 4690bdbc534SSatish Balay baij_a = HD[(h1+k)%size]; 4700bdbc534SSatish Balay 4710bdbc534SSatish Balay if (roworiented) { 4720bdbc534SSatish Balay value = v + i*(stepval+bs)*bs + j*bs; 473fef45726SSatish Balay if (addv == ADD_VALUES) { 474b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 475b4cc0f5aSSatish Balay for ( jj=0; jj<bs2; jj+=bs ) { 476fef45726SSatish Balay baij_a[jj] += *value++; 477b4cc0f5aSSatish Balay } 478b4cc0f5aSSatish Balay } 479fef45726SSatish Balay } else { 480fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 481fef45726SSatish Balay for ( jj=0; jj<bs2; jj+=bs ) { 482fef45726SSatish Balay baij_a[jj] = *value++; 483fef45726SSatish Balay } 484fef45726SSatish Balay } 485fef45726SSatish Balay } 4860bdbc534SSatish Balay } else { 4870bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 488fef45726SSatish Balay if (addv == ADD_VALUES) { 489b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 4900bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 491fef45726SSatish Balay baij_a[jj] += *value++; 492fef45726SSatish Balay } 493fef45726SSatish Balay } 494fef45726SSatish Balay } else { 495fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 496fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 497fef45726SSatish Balay baij_a[jj] = *value++; 498fef45726SSatish Balay } 499b4cc0f5aSSatish Balay } 5000bdbc534SSatish Balay } 5010bdbc534SSatish Balay } 5020bdbc534SSatish Balay break; 5030bdbc534SSatish Balay } 5040bdbc534SSatish Balay } 505fef45726SSatish Balay if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 5060bdbc534SSatish Balay } 5070bdbc534SSatish Balay } else { 5080bdbc534SSatish Balay if (!baij->donotstash) { 5090bdbc534SSatish Balay if (roworiented ) { 5100bdbc534SSatish Balay row = im[i]*bs; 5110bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5120bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5130bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5140bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 5150bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5160bdbc534SSatish Balay } 5170bdbc534SSatish Balay } 5180bdbc534SSatish Balay } 5190bdbc534SSatish Balay } else { 5200bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5210bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 5220bdbc534SSatish Balay col = in[j]*bs; 5230bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 5240bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 5250bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5260bdbc534SSatish Balay } 5270bdbc534SSatish Balay } 5280bdbc534SSatish Balay } 5290bdbc534SSatish Balay } 5300bdbc534SSatish Balay } 5310bdbc534SSatish Balay } 5320bdbc534SSatish Balay } 5330bdbc534SSatish Balay PetscFunctionReturn(0); 5340bdbc534SSatish Balay } 5350bdbc534SSatish Balay #undef __FUNC__ 5365615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 537ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 538d6de1c52SSatish Balay { 539d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 540d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 541d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 542d6de1c52SSatish Balay 543d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 544a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 545a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 546d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 547d6de1c52SSatish Balay row = idxm[i] - bsrstart; 548d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 549a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 550a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 551d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 552d6de1c52SSatish Balay col = idxn[j] - bscstart; 553d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 554d64ed03dSBarry Smith } else { 555905e6a2fSBarry Smith if (!baij->colmap) { 556905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 557905e6a2fSBarry Smith } 558e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 559dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 560d9d09a02SSatish Balay else { 561dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 562d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 563d6de1c52SSatish Balay } 564d6de1c52SSatish Balay } 565d6de1c52SSatish Balay } 566d64ed03dSBarry Smith } else { 567a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 568d6de1c52SSatish Balay } 569d6de1c52SSatish Balay } 5703a40ed3dSBarry Smith PetscFunctionReturn(0); 571d6de1c52SSatish Balay } 572d6de1c52SSatish Balay 5735615d1e5SSatish Balay #undef __FUNC__ 5745615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 575ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 576d6de1c52SSatish Balay { 577d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 578d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 579acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 580d6de1c52SSatish Balay double sum = 0.0; 581d6de1c52SSatish Balay Scalar *v; 582d6de1c52SSatish Balay 583d64ed03dSBarry Smith PetscFunctionBegin; 584d6de1c52SSatish Balay if (baij->size == 1) { 585d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 586d6de1c52SSatish Balay } else { 587d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 588d6de1c52SSatish Balay v = amat->a; 589d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 5903a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 591d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 592d6de1c52SSatish Balay #else 593d6de1c52SSatish Balay sum += (*v)*(*v); v++; 594d6de1c52SSatish Balay #endif 595d6de1c52SSatish Balay } 596d6de1c52SSatish Balay v = bmat->a; 597d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 5983a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 599d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 600d6de1c52SSatish Balay #else 601d6de1c52SSatish Balay sum += (*v)*(*v); v++; 602d6de1c52SSatish Balay #endif 603d6de1c52SSatish Balay } 604ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 605d6de1c52SSatish Balay *norm = sqrt(*norm); 606d64ed03dSBarry Smith } else { 607e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 608d6de1c52SSatish Balay } 609d64ed03dSBarry Smith } 6103a40ed3dSBarry Smith PetscFunctionReturn(0); 611d6de1c52SSatish Balay } 61257b952d6SSatish Balay 6135615d1e5SSatish Balay #undef __FUNC__ 6145615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 615ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 61657b952d6SSatish Balay { 61757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 61857b952d6SSatish Balay MPI_Comm comm = mat->comm; 61957b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 62057b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 62157b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 62257b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 62357b952d6SSatish Balay InsertMode addv; 62457b952d6SSatish Balay Scalar *rvalues,*svalues; 62557b952d6SSatish Balay 626d64ed03dSBarry Smith PetscFunctionBegin; 62757b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 628ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 62957b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 630a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 63157b952d6SSatish Balay } 632e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 63357b952d6SSatish Balay 63457b952d6SSatish Balay /* first count number of contributors to each processor */ 63557b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 63657b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 63757b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 63857b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 63957b952d6SSatish Balay idx = baij->stash.idx[i]; 64057b952d6SSatish Balay for ( j=0; j<size; j++ ) { 64157b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 64257b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 64357b952d6SSatish Balay } 64457b952d6SSatish Balay } 64557b952d6SSatish Balay } 64657b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 64757b952d6SSatish Balay 64857b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 64957b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 650ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 65157b952d6SSatish Balay nreceives = work[rank]; 652ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 65357b952d6SSatish Balay nmax = work[rank]; 65457b952d6SSatish Balay PetscFree(work); 65557b952d6SSatish Balay 65657b952d6SSatish Balay /* post receives: 65757b952d6SSatish Balay 1) each message will consist of ordered pairs 65857b952d6SSatish Balay (global index,value) we store the global index as a double 65957b952d6SSatish Balay to simplify the message passing. 66057b952d6SSatish Balay 2) since we don't know how long each individual message is we 66157b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 66257b952d6SSatish Balay this is a lot of wasted space. 66357b952d6SSatish Balay 66457b952d6SSatish Balay 66557b952d6SSatish Balay This could be done better. 66657b952d6SSatish Balay */ 667f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 668f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 66957b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 670ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 67157b952d6SSatish Balay } 67257b952d6SSatish Balay 67357b952d6SSatish Balay /* do sends: 67457b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 67557b952d6SSatish Balay the ith processor 67657b952d6SSatish Balay */ 67757b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 678d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 67957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 68057b952d6SSatish Balay starts[0] = 0; 68157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 68257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 68357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 68457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 68557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 68657b952d6SSatish Balay } 68757b952d6SSatish Balay PetscFree(owner); 68857b952d6SSatish Balay starts[0] = 0; 68957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 69057b952d6SSatish Balay count = 0; 69157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 69257b952d6SSatish Balay if (procs[i]) { 693ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 69457b952d6SSatish Balay } 69557b952d6SSatish Balay } 69657b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 69757b952d6SSatish Balay 69857b952d6SSatish Balay /* Free cache space */ 699cd1fa5fbSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 70057b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 70157b952d6SSatish Balay 70257b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 70357b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 70457b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 70557b952d6SSatish Balay baij->rmax = nmax; 70657b952d6SSatish Balay 7073a40ed3dSBarry Smith PetscFunctionReturn(0); 70857b952d6SSatish Balay } 709bd7f49f5SSatish Balay 710fef45726SSatish Balay /* 711fef45726SSatish Balay Creates the hash table, and sets the table 712fef45726SSatish Balay This table is created only once. 713fef45726SSatish Balay If new entried need to be added to the matrix 714fef45726SSatish Balay then the hash table has to be destroyed and 715fef45726SSatish Balay recreated. 716fef45726SSatish Balay */ 717fef45726SSatish Balay #undef __FUNC__ 718fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 719d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 720596b8d2eSBarry Smith { 721596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 722596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 723596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 7240bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7250bdbc534SSatish Balay int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 7260bdbc534SSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col; 727fef45726SSatish Balay int *HT,key; 7280bdbc534SSatish Balay Scalar **HD; 729fef45726SSatish Balay 730d64ed03dSBarry Smith PetscFunctionBegin; 7310bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7320bdbc534SSatish Balay size = baij->ht_size; 733fef45726SSatish Balay 7340bdbc534SSatish Balay if (baij->ht) { 7350bdbc534SSatish Balay PetscFunctionReturn(0); 736596b8d2eSBarry Smith } 7370bdbc534SSatish Balay 738fef45726SSatish Balay /* Allocate Memory for Hash Table */ 739*b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 740*b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 741*b9e4cc15SSatish Balay HD = baij->hd; 742a07cd24cSSatish Balay HT = baij->ht; 743*b9e4cc15SSatish Balay 744*b9e4cc15SSatish Balay 745fef45726SSatish Balay PetscMemzero(HT,size*sizeof(int)+sizeof(Scalar*)); 7460bdbc534SSatish Balay 747596b8d2eSBarry Smith 748596b8d2eSBarry Smith /* Loop Over A */ 7490bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 750596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 7510bdbc534SSatish Balay row = i+rstart; 7520bdbc534SSatish Balay col = aj[j]+cstart; 753596b8d2eSBarry Smith 7540bdbc534SSatish Balay key = row*baij->Nbs + col + 1; 7550bdbc534SSatish Balay /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 7560bdbc534SSatish Balay h1 = HASH1(size,key); 7570bdbc534SSatish Balay 7580bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7590bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7600bdbc534SSatish Balay HT[(h1+k)%size] = key; 7610bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 762596b8d2eSBarry Smith break; 763596b8d2eSBarry Smith } else ct++; 764596b8d2eSBarry Smith } 765bd7f49f5SSatish Balay if (k> max1) max1 = k; 766596b8d2eSBarry Smith } 767596b8d2eSBarry Smith } 768596b8d2eSBarry Smith /* Loop Over B */ 7690bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 770596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 7710bdbc534SSatish Balay row = i+rstart; 7720bdbc534SSatish Balay col = garray[bj[j]]; 7730bdbc534SSatish Balay key = row*baij->Nbs + col + 1; 7740bdbc534SSatish Balay /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 775596b8d2eSBarry Smith h1 = HASH1(size,key); 7760bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7770bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7780bdbc534SSatish Balay HT[(h1+k)%size] = key; 7790bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 780596b8d2eSBarry Smith break; 781596b8d2eSBarry Smith } else ct++; 782596b8d2eSBarry Smith } 783bd7f49f5SSatish Balay if (k> max2) max2 = k; 784596b8d2eSBarry Smith } 785596b8d2eSBarry Smith } 786596b8d2eSBarry Smith 787596b8d2eSBarry Smith /* Print Summary */ 788596b8d2eSBarry Smith for ( i=0,key=0.0,j=0; i<size; i++) 789596b8d2eSBarry Smith if (HT[i]) {j++;} 790596b8d2eSBarry Smith 7910bdbc534SSatish Balay /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 7920bdbc534SSatish Balay PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 7930bdbc534SSatish Balay fflush(stdout); 7943a40ed3dSBarry Smith PetscFunctionReturn(0); 795596b8d2eSBarry Smith } 79657b952d6SSatish Balay 7975615d1e5SSatish Balay #undef __FUNC__ 7985615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 799ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 80057b952d6SSatish Balay { 80157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 80257b952d6SSatish Balay MPI_Status *send_status,recv_status; 80357b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 804596b8d2eSBarry Smith int bs=baij->bs,row,col,other_disassembled,flg; 80557b952d6SSatish Balay Scalar *values,val; 806e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 80757b952d6SSatish Balay 808d64ed03dSBarry Smith PetscFunctionBegin; 80957b952d6SSatish Balay /* wait on receives */ 81057b952d6SSatish Balay while (count) { 811ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 81257b952d6SSatish Balay /* unpack receives into our local space */ 81357b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 814ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 81557b952d6SSatish Balay n = n/3; 81657b952d6SSatish Balay for ( i=0; i<n; i++ ) { 81757b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 81857b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 81957b952d6SSatish Balay val = values[3*i+2]; 82057b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 82157b952d6SSatish Balay col -= baij->cstart*bs; 8226fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 823d64ed03dSBarry Smith } else { 82457b952d6SSatish Balay if (mat->was_assembled) { 825905e6a2fSBarry Smith if (!baij->colmap) { 826905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 827905e6a2fSBarry Smith } 828a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 82957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 83057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 83157b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 83257b952d6SSatish Balay } 83357b952d6SSatish Balay } 8346fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 83557b952d6SSatish Balay } 83657b952d6SSatish Balay } 83757b952d6SSatish Balay count--; 83857b952d6SSatish Balay } 83957b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 84057b952d6SSatish Balay 84157b952d6SSatish Balay /* wait on sends */ 84257b952d6SSatish Balay if (baij->nsends) { 843d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 844ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 84557b952d6SSatish Balay PetscFree(send_status); 84657b952d6SSatish Balay } 84757b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 84857b952d6SSatish Balay 84957b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 85057b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 85157b952d6SSatish Balay 85257b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 85357b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 854ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 85557b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 85657b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 85757b952d6SSatish Balay } 85857b952d6SSatish Balay 8596d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 86057b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 86157b952d6SSatish Balay } 86257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 86357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 86457b952d6SSatish Balay 865596b8d2eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 86644e6884eSSatish Balay if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 867a07cd24cSSatish Balay double fact = 1.39; 868fef45726SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 869a74b47caSSatish Balay if (fact <= 1.0) fact = 1.39; 870a74b47caSSatish Balay PLogInfo(0,"[%d]MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",PetscGlobalRank,fact); 871fef45726SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 8720bdbc534SSatish Balay mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 8730bdbc534SSatish Balay mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 874bd7f49f5SSatish Balay } 87557b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 8763a40ed3dSBarry Smith PetscFunctionReturn(0); 87757b952d6SSatish Balay } 87857b952d6SSatish Balay 8795615d1e5SSatish Balay #undef __FUNC__ 8805615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 88157b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 88257b952d6SSatish Balay { 88357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 88457b952d6SSatish Balay int ierr; 88557b952d6SSatish Balay 886d64ed03dSBarry Smith PetscFunctionBegin; 88757b952d6SSatish Balay if (baij->size == 1) { 88857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 889a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 8903a40ed3dSBarry Smith PetscFunctionReturn(0); 89157b952d6SSatish Balay } 89257b952d6SSatish Balay 8935615d1e5SSatish Balay #undef __FUNC__ 8945615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 89557b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 89657b952d6SSatish Balay { 89757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 898cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 89957b952d6SSatish Balay FILE *fd; 90057b952d6SSatish Balay ViewerType vtype; 90157b952d6SSatish Balay 902d64ed03dSBarry Smith PetscFunctionBegin; 90357b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 90457b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 90557b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 906639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 9074e220ebcSLois Curfman McInnes MatInfo info; 90857b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 90957b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 9104e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 91157b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 91257b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 9134e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 9144e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 9154e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 9164e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 9174e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 9184e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 91957b952d6SSatish Balay fflush(fd); 92057b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 92157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 9223a40ed3dSBarry Smith PetscFunctionReturn(0); 923d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 924bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 9253a40ed3dSBarry Smith PetscFunctionReturn(0); 92657b952d6SSatish Balay } 92757b952d6SSatish Balay } 92857b952d6SSatish Balay 92957b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 93057b952d6SSatish Balay Draw draw; 93157b952d6SSatish Balay PetscTruth isnull; 93257b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 9333a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 93457b952d6SSatish Balay } 93557b952d6SSatish Balay 93657b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 93757b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 93857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 93957b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 94057b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 94157b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 94257b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 94357b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 94457b952d6SSatish Balay fflush(fd); 94557b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 946d64ed03dSBarry Smith } else { 94757b952d6SSatish Balay int size = baij->size; 94857b952d6SSatish Balay rank = baij->rank; 94957b952d6SSatish Balay if (size == 1) { 95057b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 951d64ed03dSBarry Smith } else { 95257b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 95357b952d6SSatish Balay Mat A; 95457b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 95557b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 95657b952d6SSatish Balay int mbs=baij->mbs; 95757b952d6SSatish Balay Scalar *a; 95857b952d6SSatish Balay 95957b952d6SSatish Balay if (!rank) { 96055843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 961d64ed03dSBarry Smith } else { 96255843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 96357b952d6SSatish Balay } 96457b952d6SSatish Balay PLogObjectParent(mat,A); 96557b952d6SSatish Balay 96657b952d6SSatish Balay /* copy over the A part */ 96757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 96857b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 96957b952d6SSatish Balay row = baij->rstart; 97057b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 97157b952d6SSatish Balay 97257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 97357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 97457b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 97557b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 97657b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 97757b952d6SSatish Balay for (k=0; k<bs; k++ ) { 978cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 979cee3aa6bSSatish Balay col++; a += bs; 98057b952d6SSatish Balay } 98157b952d6SSatish Balay } 98257b952d6SSatish Balay } 98357b952d6SSatish Balay /* copy over the B part */ 98457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 98557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 98657b952d6SSatish Balay row = baij->rstart*bs; 98757b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 98857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 98957b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 99057b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 99157b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 99257b952d6SSatish Balay for (k=0; k<bs; k++ ) { 993cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 994cee3aa6bSSatish Balay col++; a += bs; 99557b952d6SSatish Balay } 99657b952d6SSatish Balay } 99757b952d6SSatish Balay } 99857b952d6SSatish Balay PetscFree(rvals); 9996d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10006d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 100155843e3eSBarry Smith /* 100255843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 100355843e3eSBarry Smith synchronized across all processors that share the Draw object 100455843e3eSBarry Smith */ 100555843e3eSBarry Smith if (!rank || vtype == DRAW_VIEWER) { 100657b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 100757b952d6SSatish Balay } 100857b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 100957b952d6SSatish Balay } 101057b952d6SSatish Balay } 10113a40ed3dSBarry Smith PetscFunctionReturn(0); 101257b952d6SSatish Balay } 101357b952d6SSatish Balay 101457b952d6SSatish Balay 101557b952d6SSatish Balay 10165615d1e5SSatish Balay #undef __FUNC__ 10175615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1018ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 101957b952d6SSatish Balay { 102057b952d6SSatish Balay Mat mat = (Mat) obj; 102157b952d6SSatish Balay int ierr; 102257b952d6SSatish Balay ViewerType vtype; 102357b952d6SSatish Balay 1024d64ed03dSBarry Smith PetscFunctionBegin; 102557b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 102657b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 102757b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 102857b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 10293a40ed3dSBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 10303a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 103157b952d6SSatish Balay } 10323a40ed3dSBarry Smith PetscFunctionReturn(0); 103357b952d6SSatish Balay } 103457b952d6SSatish Balay 10355615d1e5SSatish Balay #undef __FUNC__ 10365615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1037ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 103879bdfe76SSatish Balay { 103979bdfe76SSatish Balay Mat mat = (Mat) obj; 104079bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 104179bdfe76SSatish Balay int ierr; 104279bdfe76SSatish Balay 1043d64ed03dSBarry Smith PetscFunctionBegin; 10443a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 104579bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 104679bdfe76SSatish Balay #endif 104779bdfe76SSatish Balay 104883e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 104979bdfe76SSatish Balay PetscFree(baij->rowners); 105079bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 105179bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 105279bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 105379bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 105479bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 105579bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 105679bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 105730793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1058*b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 105979bdfe76SSatish Balay PetscFree(baij); 106079bdfe76SSatish Balay PLogObjectDestroy(mat); 106179bdfe76SSatish Balay PetscHeaderDestroy(mat); 10623a40ed3dSBarry Smith PetscFunctionReturn(0); 106379bdfe76SSatish Balay } 106479bdfe76SSatish Balay 10655615d1e5SSatish Balay #undef __FUNC__ 10665615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1067ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1068cee3aa6bSSatish Balay { 1069cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 107047b4a8eaSLois Curfman McInnes int ierr, nt; 1071cee3aa6bSSatish Balay 1072d64ed03dSBarry Smith PetscFunctionBegin; 1073c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 107447b4a8eaSLois Curfman McInnes if (nt != a->n) { 1075a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 107647b4a8eaSLois Curfman McInnes } 1077c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 107847b4a8eaSLois Curfman McInnes if (nt != a->m) { 1079a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 108047b4a8eaSLois Curfman McInnes } 108143a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1082cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 108343a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1084cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 108543a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 10863a40ed3dSBarry Smith PetscFunctionReturn(0); 1087cee3aa6bSSatish Balay } 1088cee3aa6bSSatish Balay 10895615d1e5SSatish Balay #undef __FUNC__ 10905615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1091ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1092cee3aa6bSSatish Balay { 1093cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1094cee3aa6bSSatish Balay int ierr; 1095d64ed03dSBarry Smith 1096d64ed03dSBarry Smith PetscFunctionBegin; 109743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1098cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 109943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1100cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 11013a40ed3dSBarry Smith PetscFunctionReturn(0); 1102cee3aa6bSSatish Balay } 1103cee3aa6bSSatish Balay 11045615d1e5SSatish Balay #undef __FUNC__ 11055615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1106ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1107cee3aa6bSSatish Balay { 1108cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1109cee3aa6bSSatish Balay int ierr; 1110cee3aa6bSSatish Balay 1111d64ed03dSBarry Smith PetscFunctionBegin; 1112cee3aa6bSSatish Balay /* do nondiagonal part */ 1113cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1114cee3aa6bSSatish Balay /* send it on its way */ 1115537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1116cee3aa6bSSatish Balay /* do local part */ 1117cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1118cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1119cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1120cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1121639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 11223a40ed3dSBarry Smith PetscFunctionReturn(0); 1123cee3aa6bSSatish Balay } 1124cee3aa6bSSatish Balay 11255615d1e5SSatish Balay #undef __FUNC__ 11265615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1127ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1128cee3aa6bSSatish Balay { 1129cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1130cee3aa6bSSatish Balay int ierr; 1131cee3aa6bSSatish Balay 1132d64ed03dSBarry Smith PetscFunctionBegin; 1133cee3aa6bSSatish Balay /* do nondiagonal part */ 1134cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1135cee3aa6bSSatish Balay /* send it on its way */ 1136537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1137cee3aa6bSSatish Balay /* do local part */ 1138cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1139cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1140cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1141cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1142537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 11433a40ed3dSBarry Smith PetscFunctionReturn(0); 1144cee3aa6bSSatish Balay } 1145cee3aa6bSSatish Balay 1146cee3aa6bSSatish Balay /* 1147cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1148cee3aa6bSSatish Balay diagonal block 1149cee3aa6bSSatish Balay */ 11505615d1e5SSatish Balay #undef __FUNC__ 11515615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1152ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1153cee3aa6bSSatish Balay { 1154cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 11553a40ed3dSBarry Smith int ierr; 1156d64ed03dSBarry Smith 1157d64ed03dSBarry Smith PetscFunctionBegin; 1158a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 11593a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 11603a40ed3dSBarry Smith PetscFunctionReturn(0); 1161cee3aa6bSSatish Balay } 1162cee3aa6bSSatish Balay 11635615d1e5SSatish Balay #undef __FUNC__ 11645615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1165ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1166cee3aa6bSSatish Balay { 1167cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1168cee3aa6bSSatish Balay int ierr; 1169d64ed03dSBarry Smith 1170d64ed03dSBarry Smith PetscFunctionBegin; 1171cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1172cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 11733a40ed3dSBarry Smith PetscFunctionReturn(0); 1174cee3aa6bSSatish Balay } 1175026e39d0SSatish Balay 11765615d1e5SSatish Balay #undef __FUNC__ 11775615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1178ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 117957b952d6SSatish Balay { 118057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1181d64ed03dSBarry Smith 1182d64ed03dSBarry Smith PetscFunctionBegin; 1183bd7f49f5SSatish Balay if (m) *m = mat->M; 1184bd7f49f5SSatish Balay if (n) *n = mat->N; 11853a40ed3dSBarry Smith PetscFunctionReturn(0); 118657b952d6SSatish Balay } 118757b952d6SSatish Balay 11885615d1e5SSatish Balay #undef __FUNC__ 11895615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1190ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 119157b952d6SSatish Balay { 119257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1193d64ed03dSBarry Smith 1194d64ed03dSBarry Smith PetscFunctionBegin; 119557b952d6SSatish Balay *m = mat->m; *n = mat->N; 11963a40ed3dSBarry Smith PetscFunctionReturn(0); 119757b952d6SSatish Balay } 119857b952d6SSatish Balay 11995615d1e5SSatish Balay #undef __FUNC__ 12005615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1201ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 120257b952d6SSatish Balay { 120357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1204d64ed03dSBarry Smith 1205d64ed03dSBarry Smith PetscFunctionBegin; 120657b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 12073a40ed3dSBarry Smith PetscFunctionReturn(0); 120857b952d6SSatish Balay } 120957b952d6SSatish Balay 1210acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1211acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1212acdf5bf4SSatish Balay 12135615d1e5SSatish Balay #undef __FUNC__ 12145615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1215acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1216acdf5bf4SSatish Balay { 1217acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1218acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1219acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1220d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1221d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1222acdf5bf4SSatish Balay 1223d64ed03dSBarry Smith PetscFunctionBegin; 1224a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1225acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1226acdf5bf4SSatish Balay 1227acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1228acdf5bf4SSatish Balay /* 1229acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1230acdf5bf4SSatish Balay */ 1231acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1232bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1233bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1234acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1235acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1236acdf5bf4SSatish Balay } 1237acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1238acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1239acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1240acdf5bf4SSatish Balay } 1241acdf5bf4SSatish Balay 1242a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1243d9d09a02SSatish Balay lrow = row - brstart; 1244acdf5bf4SSatish Balay 1245acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1246acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1247acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1248acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1249acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1250acdf5bf4SSatish Balay nztot = nzA + nzB; 1251acdf5bf4SSatish Balay 1252acdf5bf4SSatish Balay cmap = mat->garray; 1253acdf5bf4SSatish Balay if (v || idx) { 1254acdf5bf4SSatish Balay if (nztot) { 1255acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1256acdf5bf4SSatish Balay int imark = -1; 1257acdf5bf4SSatish Balay if (v) { 1258acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1259acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1260d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1261acdf5bf4SSatish Balay else break; 1262acdf5bf4SSatish Balay } 1263acdf5bf4SSatish Balay imark = i; 1264acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1265acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1266acdf5bf4SSatish Balay } 1267acdf5bf4SSatish Balay if (idx) { 1268acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1269acdf5bf4SSatish Balay if (imark > -1) { 1270acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1271bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1272acdf5bf4SSatish Balay } 1273acdf5bf4SSatish Balay } else { 1274acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1275d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1276d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1277acdf5bf4SSatish Balay else break; 1278acdf5bf4SSatish Balay } 1279acdf5bf4SSatish Balay imark = i; 1280acdf5bf4SSatish Balay } 1281d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1282d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1283acdf5bf4SSatish Balay } 1284d64ed03dSBarry Smith } else { 1285d212a18eSSatish Balay if (idx) *idx = 0; 1286d212a18eSSatish Balay if (v) *v = 0; 1287d212a18eSSatish Balay } 1288acdf5bf4SSatish Balay } 1289acdf5bf4SSatish Balay *nz = nztot; 1290acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1291acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 12923a40ed3dSBarry Smith PetscFunctionReturn(0); 1293acdf5bf4SSatish Balay } 1294acdf5bf4SSatish Balay 12955615d1e5SSatish Balay #undef __FUNC__ 12965615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1297acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1298acdf5bf4SSatish Balay { 1299acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1300d64ed03dSBarry Smith 1301d64ed03dSBarry Smith PetscFunctionBegin; 1302acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1303a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1304acdf5bf4SSatish Balay } 1305acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13063a40ed3dSBarry Smith PetscFunctionReturn(0); 1307acdf5bf4SSatish Balay } 1308acdf5bf4SSatish Balay 13095615d1e5SSatish Balay #undef __FUNC__ 13105615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1311ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 13125a838052SSatish Balay { 13135a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1314d64ed03dSBarry Smith 1315d64ed03dSBarry Smith PetscFunctionBegin; 13165a838052SSatish Balay *bs = baij->bs; 13173a40ed3dSBarry Smith PetscFunctionReturn(0); 13185a838052SSatish Balay } 13195a838052SSatish Balay 13205615d1e5SSatish Balay #undef __FUNC__ 13215615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1322ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 132358667388SSatish Balay { 132458667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 132558667388SSatish Balay int ierr; 1326d64ed03dSBarry Smith 1327d64ed03dSBarry Smith PetscFunctionBegin; 132858667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 132958667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 13303a40ed3dSBarry Smith PetscFunctionReturn(0); 133158667388SSatish Balay } 13320ac07820SSatish Balay 13335615d1e5SSatish Balay #undef __FUNC__ 13345615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1335ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 13360ac07820SSatish Balay { 13374e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 13384e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 13397d57db60SLois Curfman McInnes int ierr; 13407d57db60SLois Curfman McInnes double isend[5], irecv[5]; 13410ac07820SSatish Balay 1342d64ed03dSBarry Smith PetscFunctionBegin; 13434e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 13444e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 13454e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 13464e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 13474e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 13480ac07820SSatish Balay if (flag == MAT_LOCAL) { 13494e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 13504e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 13514e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 13524e220ebcSLois Curfman McInnes info->memory = isend[3]; 13534e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 13540ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1355ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 13564e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13574e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13584e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13594e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13604e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13610ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1362ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 13634e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13644e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13654e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13664e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13674e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13680ac07820SSatish Balay } 13695a5d4f66SBarry Smith info->rows_global = (double)a->M; 13705a5d4f66SBarry Smith info->columns_global = (double)a->N; 13715a5d4f66SBarry Smith info->rows_local = (double)a->m; 13725a5d4f66SBarry Smith info->columns_local = (double)a->N; 13734e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 13744e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 13754e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 13763a40ed3dSBarry Smith PetscFunctionReturn(0); 13770ac07820SSatish Balay } 13780ac07820SSatish Balay 13795615d1e5SSatish Balay #undef __FUNC__ 13805615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1381ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 138258667388SSatish Balay { 138358667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 138458667388SSatish Balay 1385d64ed03dSBarry Smith PetscFunctionBegin; 138658667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 138758667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 13886da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1389c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 139096854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1391c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1392b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1393b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1394b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1395aeafbbfcSLois Curfman McInnes a->roworiented = 1; 139658667388SSatish Balay MatSetOption(a->A,op); 139758667388SSatish Balay MatSetOption(a->B,op); 1398b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 13996da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 140058667388SSatish Balay op == MAT_SYMMETRIC || 140158667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 140258667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 140358667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 140458667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 140558667388SSatish Balay a->roworiented = 0; 140658667388SSatish Balay MatSetOption(a->A,op); 140758667388SSatish Balay MatSetOption(a->B,op); 14082b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 140990f02eecSBarry Smith a->donotstash = 1; 1410d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1411d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1412d64ed03dSBarry Smith } else { 1413d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1414d64ed03dSBarry Smith } 14153a40ed3dSBarry Smith PetscFunctionReturn(0); 141658667388SSatish Balay } 141758667388SSatish Balay 14185615d1e5SSatish Balay #undef __FUNC__ 14195615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1420ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 14210ac07820SSatish Balay { 14220ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 14230ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 14240ac07820SSatish Balay Mat B; 14250ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 14260ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 14270ac07820SSatish Balay Scalar *a; 14280ac07820SSatish Balay 1429d64ed03dSBarry Smith PetscFunctionBegin; 1430a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 14310ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 14320ac07820SSatish Balay CHKERRQ(ierr); 14330ac07820SSatish Balay 14340ac07820SSatish Balay /* copy over the A part */ 14350ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 14360ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14370ac07820SSatish Balay row = baij->rstart; 14380ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 14390ac07820SSatish Balay 14400ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14410ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14420ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14430ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14440ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 14450ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14460ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14470ac07820SSatish Balay col++; a += bs; 14480ac07820SSatish Balay } 14490ac07820SSatish Balay } 14500ac07820SSatish Balay } 14510ac07820SSatish Balay /* copy over the B part */ 14520ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 14530ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14540ac07820SSatish Balay row = baij->rstart*bs; 14550ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14560ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14570ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14580ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14590ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 14600ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14610ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14620ac07820SSatish Balay col++; a += bs; 14630ac07820SSatish Balay } 14640ac07820SSatish Balay } 14650ac07820SSatish Balay } 14660ac07820SSatish Balay PetscFree(rvals); 14670ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14680ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14690ac07820SSatish Balay 14700ac07820SSatish Balay if (matout != PETSC_NULL) { 14710ac07820SSatish Balay *matout = B; 14720ac07820SSatish Balay } else { 14730ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 14740ac07820SSatish Balay PetscFree(baij->rowners); 14750ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 14760ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 14770ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 14780ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 14790ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 14800ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 14810ac07820SSatish Balay PetscFree(baij); 1482f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 14830ac07820SSatish Balay PetscHeaderDestroy(B); 14840ac07820SSatish Balay } 14853a40ed3dSBarry Smith PetscFunctionReturn(0); 14860ac07820SSatish Balay } 14870e95ebc0SSatish Balay 14885615d1e5SSatish Balay #undef __FUNC__ 14895615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 14900e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 14910e95ebc0SSatish Balay { 14920e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 14930e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 14940e95ebc0SSatish Balay int ierr,s1,s2,s3; 14950e95ebc0SSatish Balay 1496d64ed03dSBarry Smith PetscFunctionBegin; 14970e95ebc0SSatish Balay if (ll) { 14980e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 14990e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1500a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 15010e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 15020e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 15030e95ebc0SSatish Balay } 1504a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 15053a40ed3dSBarry Smith PetscFunctionReturn(0); 15060e95ebc0SSatish Balay } 15070e95ebc0SSatish Balay 15085615d1e5SSatish Balay #undef __FUNC__ 15095615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1510ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 15110ac07820SSatish Balay { 15120ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 15130ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1514a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 15150ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 15160ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1517a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 15180ac07820SSatish Balay MPI_Comm comm = A->comm; 15190ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 15200ac07820SSatish Balay MPI_Status recv_status,*send_status; 15210ac07820SSatish Balay IS istmp; 15220ac07820SSatish Balay 1523d64ed03dSBarry Smith PetscFunctionBegin; 15240ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 15250ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 15260ac07820SSatish Balay 15270ac07820SSatish Balay /* first count number of contributors to each processor */ 15280ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 15290ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 15300ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 15310ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15320ac07820SSatish Balay idx = rows[i]; 15330ac07820SSatish Balay found = 0; 15340ac07820SSatish Balay for ( j=0; j<size; j++ ) { 15350ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 15360ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 15370ac07820SSatish Balay } 15380ac07820SSatish Balay } 1539a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 15400ac07820SSatish Balay } 15410ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 15420ac07820SSatish Balay 15430ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 15440ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1545ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 15460ac07820SSatish Balay nrecvs = work[rank]; 1547ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 15480ac07820SSatish Balay nmax = work[rank]; 15490ac07820SSatish Balay PetscFree(work); 15500ac07820SSatish Balay 15510ac07820SSatish Balay /* post receives: */ 1552d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1553d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 15540ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1555ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 15560ac07820SSatish Balay } 15570ac07820SSatish Balay 15580ac07820SSatish Balay /* do sends: 15590ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 15600ac07820SSatish Balay the ith processor 15610ac07820SSatish Balay */ 15620ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1563ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 15640ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 15650ac07820SSatish Balay starts[0] = 0; 15660ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15670ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15680ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 15690ac07820SSatish Balay } 15700ac07820SSatish Balay ISRestoreIndices(is,&rows); 15710ac07820SSatish Balay 15720ac07820SSatish Balay starts[0] = 0; 15730ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15740ac07820SSatish Balay count = 0; 15750ac07820SSatish Balay for ( i=0; i<size; i++ ) { 15760ac07820SSatish Balay if (procs[i]) { 1577ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 15780ac07820SSatish Balay } 15790ac07820SSatish Balay } 15800ac07820SSatish Balay PetscFree(starts); 15810ac07820SSatish Balay 15820ac07820SSatish Balay base = owners[rank]*bs; 15830ac07820SSatish Balay 15840ac07820SSatish Balay /* wait on receives */ 15850ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 15860ac07820SSatish Balay source = lens + nrecvs; 15870ac07820SSatish Balay count = nrecvs; slen = 0; 15880ac07820SSatish Balay while (count) { 1589ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 15900ac07820SSatish Balay /* unpack receives into our local space */ 1591ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 15920ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 15930ac07820SSatish Balay lens[imdex] = n; 15940ac07820SSatish Balay slen += n; 15950ac07820SSatish Balay count--; 15960ac07820SSatish Balay } 15970ac07820SSatish Balay PetscFree(recv_waits); 15980ac07820SSatish Balay 15990ac07820SSatish Balay /* move the data into the send scatter */ 16000ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 16010ac07820SSatish Balay count = 0; 16020ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 16030ac07820SSatish Balay values = rvalues + i*nmax; 16040ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 16050ac07820SSatish Balay lrows[count++] = values[j] - base; 16060ac07820SSatish Balay } 16070ac07820SSatish Balay } 16080ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 16090ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 16100ac07820SSatish Balay 16110ac07820SSatish Balay /* actually zap the local rows */ 1612029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 16130ac07820SSatish Balay PLogObjectParent(A,istmp); 1614a07cd24cSSatish Balay 1615a07cd24cSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 16160ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 16170ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 16180ac07820SSatish Balay 1619a07cd24cSSatish Balay if (diag) { 1620a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1621a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1622a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1623a07cd24cSSatish Balay } 1624a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1625a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1626a07cd24cSSatish Balay } 1627a07cd24cSSatish Balay PetscFree(lrows); 1628a07cd24cSSatish Balay 16290ac07820SSatish Balay /* wait on sends */ 16300ac07820SSatish Balay if (nsends) { 1631d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1632ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 16330ac07820SSatish Balay PetscFree(send_status); 16340ac07820SSatish Balay } 16350ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 16360ac07820SSatish Balay 16373a40ed3dSBarry Smith PetscFunctionReturn(0); 16380ac07820SSatish Balay } 1639ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 16405615d1e5SSatish Balay #undef __FUNC__ 16415615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1642ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1643ba4ca20aSSatish Balay { 1644ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 16453a40ed3dSBarry Smith int ierr; 1646ba4ca20aSSatish Balay 1647d64ed03dSBarry Smith PetscFunctionBegin; 16483a40ed3dSBarry Smith if (!a->rank) { 16493a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 16503a40ed3dSBarry Smith } 16513a40ed3dSBarry Smith PetscFunctionReturn(0); 1652ba4ca20aSSatish Balay } 16530ac07820SSatish Balay 16545615d1e5SSatish Balay #undef __FUNC__ 16555615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1656ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1657bb5a7306SBarry Smith { 1658bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1659bb5a7306SBarry Smith int ierr; 1660d64ed03dSBarry Smith 1661d64ed03dSBarry Smith PetscFunctionBegin; 1662bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 16633a40ed3dSBarry Smith PetscFunctionReturn(0); 1664bb5a7306SBarry Smith } 1665bb5a7306SBarry Smith 16660ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 16670ac07820SSatish Balay 166879bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 166979bdfe76SSatish Balay static struct _MatOps MatOps = { 1670bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 16714c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 16724c50302cSBarry Smith 0,0,0,0, 16730ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 16740e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 167558667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 16764c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 16774c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 16784c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 167994a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1680d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1681ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1682bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1683ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 168479bdfe76SSatish Balay 168579bdfe76SSatish Balay 16865615d1e5SSatish Balay #undef __FUNC__ 16875615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 168879bdfe76SSatish Balay /*@C 168979bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 169079bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 169179bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 169279bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 169379bdfe76SSatish Balay performance can be increased by more than a factor of 50. 169479bdfe76SSatish Balay 169579bdfe76SSatish Balay Input Parameters: 169679bdfe76SSatish Balay . comm - MPI communicator 169779bdfe76SSatish Balay . bs - size of blockk 169879bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 169992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 170092e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 170192e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 170292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 170392e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 170479bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 170592e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 170679bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 170779bdfe76SSatish Balay submatrix (same for all local rows) 170892e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 170992e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 171092e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 171192e8d321SLois Curfman McInnes it is zero. 171292e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 171379bdfe76SSatish Balay submatrix (same for all local rows). 171492e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 171592e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 171692e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 171779bdfe76SSatish Balay 171879bdfe76SSatish Balay Output Parameter: 171979bdfe76SSatish Balay . A - the matrix 172079bdfe76SSatish Balay 172179bdfe76SSatish Balay Notes: 172279bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 172379bdfe76SSatish Balay (possibly both). 172479bdfe76SSatish Balay 172579bdfe76SSatish Balay Storage Information: 172679bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 172779bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 172879bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 172979bdfe76SSatish Balay local matrix (a rectangular submatrix). 173079bdfe76SSatish Balay 173179bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 173279bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 173379bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 173479bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 173579bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 173679bdfe76SSatish Balay 173779bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 173879bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 173979bdfe76SSatish Balay 174079bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 174179bdfe76SSatish Balay $ ------------------- 174279bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 174379bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 174479bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 174579bdfe76SSatish Balay $ ------------------- 174679bdfe76SSatish Balay $ 174779bdfe76SSatish Balay 174879bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 174979bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 175079bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 175157b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 175279bdfe76SSatish Balay 1753d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1754d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 175579bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 175692e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 175792e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 17586da5968aSLois Curfman McInnes matrices. 175979bdfe76SSatish Balay 176092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 176179bdfe76SSatish Balay 176279bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 176379bdfe76SSatish Balay @*/ 176479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 176579bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 176679bdfe76SSatish Balay { 176779bdfe76SSatish Balay Mat B; 176879bdfe76SSatish Balay Mat_MPIBAIJ *b; 17694c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 177079bdfe76SSatish Balay 1771d64ed03dSBarry Smith PetscFunctionBegin; 1772a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 17733914022bSBarry Smith 17743914022bSBarry Smith MPI_Comm_size(comm,&size); 17753914022bSBarry Smith if (size == 1) { 17763914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 17773914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 17783914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 17793a40ed3dSBarry Smith PetscFunctionReturn(0); 17803914022bSBarry Smith } 17813914022bSBarry Smith 178279bdfe76SSatish Balay *A = 0; 1783d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 178479bdfe76SSatish Balay PLogObjectCreate(B); 178579bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 178679bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 178779bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 17884c50302cSBarry Smith 178979bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 179079bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 179190f02eecSBarry Smith B->mapping = 0; 179279bdfe76SSatish Balay B->factor = 0; 179379bdfe76SSatish Balay B->assembled = PETSC_FALSE; 179479bdfe76SSatish Balay 1795e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 179679bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 179779bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 179879bdfe76SSatish Balay 1799d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1800a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1801d64ed03dSBarry Smith } 1802a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1803a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1804a8c6a408SBarry Smith } 1805a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1806a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1807a8c6a408SBarry Smith } 1808cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1809cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 181079bdfe76SSatish Balay 181179bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 181279bdfe76SSatish Balay work[0] = m; work[1] = n; 181379bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1814ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 181579bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 181679bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 181779bdfe76SSatish Balay } 181879bdfe76SSatish Balay if (m == PETSC_DECIDE) { 181979bdfe76SSatish Balay Mbs = M/bs; 1820a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 182179bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 182279bdfe76SSatish Balay m = mbs*bs; 182379bdfe76SSatish Balay } 182479bdfe76SSatish Balay if (n == PETSC_DECIDE) { 182579bdfe76SSatish Balay Nbs = N/bs; 1826a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 182779bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 182879bdfe76SSatish Balay n = nbs*bs; 182979bdfe76SSatish Balay } 1830a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 1831a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1832a8c6a408SBarry Smith } 183379bdfe76SSatish Balay 183479bdfe76SSatish Balay b->m = m; B->m = m; 183579bdfe76SSatish Balay b->n = n; B->n = n; 183679bdfe76SSatish Balay b->N = N; B->N = N; 183779bdfe76SSatish Balay b->M = M; B->M = M; 183879bdfe76SSatish Balay b->bs = bs; 183979bdfe76SSatish Balay b->bs2 = bs*bs; 184079bdfe76SSatish Balay b->mbs = mbs; 184179bdfe76SSatish Balay b->nbs = nbs; 184279bdfe76SSatish Balay b->Mbs = Mbs; 184379bdfe76SSatish Balay b->Nbs = Nbs; 184479bdfe76SSatish Balay 184579bdfe76SSatish Balay /* build local table of row and column ownerships */ 184679bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1847f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 18480ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 1849ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 185079bdfe76SSatish Balay b->rowners[0] = 0; 185179bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 185279bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 185379bdfe76SSatish Balay } 185479bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 185579bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 18564fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 18574fa0d573SSatish Balay b->rend_bs = b->rend * bs; 18584fa0d573SSatish Balay 1859ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 186079bdfe76SSatish Balay b->cowners[0] = 0; 186179bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 186279bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 186379bdfe76SSatish Balay } 186479bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 186579bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 18664fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 18674fa0d573SSatish Balay b->cend_bs = b->cend * bs; 186879bdfe76SSatish Balay 1869a07cd24cSSatish Balay 187079bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1871029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 187279bdfe76SSatish Balay PLogObjectParent(B,b->A); 187379bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1874029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 187579bdfe76SSatish Balay PLogObjectParent(B,b->B); 187679bdfe76SSatish Balay 187779bdfe76SSatish Balay /* build cache for off array entries formed */ 187879bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 187990f02eecSBarry Smith b->donotstash = 0; 188079bdfe76SSatish Balay b->colmap = 0; 188179bdfe76SSatish Balay b->garray = 0; 188279bdfe76SSatish Balay b->roworiented = 1; 188379bdfe76SSatish Balay 188430793edcSSatish Balay /* stuff used in block assembly */ 188530793edcSSatish Balay b->barray = 0; 188630793edcSSatish Balay 188779bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 188879bdfe76SSatish Balay b->lvec = 0; 188979bdfe76SSatish Balay b->Mvctx = 0; 189079bdfe76SSatish Balay 189179bdfe76SSatish Balay /* stuff for MatGetRow() */ 189279bdfe76SSatish Balay b->rowindices = 0; 189379bdfe76SSatish Balay b->rowvalues = 0; 189479bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 189579bdfe76SSatish Balay 1896a07cd24cSSatish Balay /* hash table stuff */ 1897a07cd24cSSatish Balay b->ht = 0; 18980bdbc534SSatish Balay b->ht_size = 0; 1899a07cd24cSSatish Balay 190079bdfe76SSatish Balay *A = B; 19013a40ed3dSBarry Smith PetscFunctionReturn(0); 190279bdfe76SSatish Balay } 1903026e39d0SSatish Balay 19045615d1e5SSatish Balay #undef __FUNC__ 19055615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 19060ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 19070ac07820SSatish Balay { 19080ac07820SSatish Balay Mat mat; 19090ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 19100ac07820SSatish Balay int ierr, len=0, flg; 19110ac07820SSatish Balay 1912d64ed03dSBarry Smith PetscFunctionBegin; 19130ac07820SSatish Balay *newmat = 0; 1914d4bb536fSBarry Smith PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 19150ac07820SSatish Balay PLogObjectCreate(mat); 19160ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 19170ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 19180ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 19190ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 19200ac07820SSatish Balay mat->factor = matin->factor; 19210ac07820SSatish Balay mat->assembled = PETSC_TRUE; 19220ac07820SSatish Balay 19230ac07820SSatish Balay a->m = mat->m = oldmat->m; 19240ac07820SSatish Balay a->n = mat->n = oldmat->n; 19250ac07820SSatish Balay a->M = mat->M = oldmat->M; 19260ac07820SSatish Balay a->N = mat->N = oldmat->N; 19270ac07820SSatish Balay 19280ac07820SSatish Balay a->bs = oldmat->bs; 19290ac07820SSatish Balay a->bs2 = oldmat->bs2; 19300ac07820SSatish Balay a->mbs = oldmat->mbs; 19310ac07820SSatish Balay a->nbs = oldmat->nbs; 19320ac07820SSatish Balay a->Mbs = oldmat->Mbs; 19330ac07820SSatish Balay a->Nbs = oldmat->Nbs; 19340ac07820SSatish Balay 19350ac07820SSatish Balay a->rstart = oldmat->rstart; 19360ac07820SSatish Balay a->rend = oldmat->rend; 19370ac07820SSatish Balay a->cstart = oldmat->cstart; 19380ac07820SSatish Balay a->cend = oldmat->cend; 19390ac07820SSatish Balay a->size = oldmat->size; 19400ac07820SSatish Balay a->rank = oldmat->rank; 1941e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 19420ac07820SSatish Balay a->rowvalues = 0; 19430ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 194430793edcSSatish Balay a->barray = 0; 19450ac07820SSatish Balay 19460ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1947f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 19480ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 19490ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 19500ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 19510ac07820SSatish Balay if (oldmat->colmap) { 19520ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 19530ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 19540ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 19550ac07820SSatish Balay } else a->colmap = 0; 19560ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 19570ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 19580ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 19590ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 19600ac07820SSatish Balay } else a->garray = 0; 19610ac07820SSatish Balay 19620ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 19630ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 19640ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 19650ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 19660ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 19670ac07820SSatish Balay PLogObjectParent(mat,a->A); 19680ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 19690ac07820SSatish Balay PLogObjectParent(mat,a->B); 19700ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 19710ac07820SSatish Balay if (flg) { 19720ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 19730ac07820SSatish Balay } 19740ac07820SSatish Balay *newmat = mat; 19753a40ed3dSBarry Smith PetscFunctionReturn(0); 19760ac07820SSatish Balay } 197757b952d6SSatish Balay 197857b952d6SSatish Balay #include "sys.h" 197957b952d6SSatish Balay 19805615d1e5SSatish Balay #undef __FUNC__ 19815615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 198257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 198357b952d6SSatish Balay { 198457b952d6SSatish Balay Mat A; 198557b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 198657b952d6SSatish Balay Scalar *vals,*buf; 198757b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 198857b952d6SSatish Balay MPI_Status status; 1989cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 199057b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 199157b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 199257b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 199357b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 199457b952d6SSatish Balay 1995d64ed03dSBarry Smith PetscFunctionBegin; 199657b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 199757b952d6SSatish Balay bs2 = bs*bs; 199857b952d6SSatish Balay 199957b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 200057b952d6SSatish Balay if (!rank) { 200157b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2002e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2003a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2004d64ed03dSBarry Smith if (header[3] < 0) { 2005a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2006d64ed03dSBarry Smith } 20076c5fab8fSBarry Smith } 2008d64ed03dSBarry Smith 2009ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 201057b952d6SSatish Balay M = header[1]; N = header[2]; 201157b952d6SSatish Balay 2012a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 201357b952d6SSatish Balay 201457b952d6SSatish Balay /* 201557b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 201657b952d6SSatish Balay divisible by the blocksize 201757b952d6SSatish Balay */ 201857b952d6SSatish Balay Mbs = M/bs; 201957b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 202057b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 202157b952d6SSatish Balay else Mbs++; 202257b952d6SSatish Balay if (extra_rows &&!rank) { 2023b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 202457b952d6SSatish Balay } 2025537820f0SBarry Smith 202657b952d6SSatish Balay /* determine ownership of all rows */ 202757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 202857b952d6SSatish Balay m = mbs * bs; 2029cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2030cee3aa6bSSatish Balay browners = rowners + size + 1; 2031ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 203257b952d6SSatish Balay rowners[0] = 0; 2033cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2034cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 203557b952d6SSatish Balay rstart = rowners[rank]; 203657b952d6SSatish Balay rend = rowners[rank+1]; 203757b952d6SSatish Balay 203857b952d6SSatish Balay /* distribute row lengths to all processors */ 203957b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 204057b952d6SSatish Balay if (!rank) { 204157b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2042e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 204357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 204457b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2045cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2046ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 204757b952d6SSatish Balay PetscFree(sndcounts); 2048d64ed03dSBarry Smith } else { 2049ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 205057b952d6SSatish Balay } 205157b952d6SSatish Balay 205257b952d6SSatish Balay if (!rank) { 205357b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 205457b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 205557b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 205657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 205757b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 205857b952d6SSatish Balay procsnz[i] += rowlengths[j]; 205957b952d6SSatish Balay } 206057b952d6SSatish Balay } 206157b952d6SSatish Balay PetscFree(rowlengths); 206257b952d6SSatish Balay 206357b952d6SSatish Balay /* determine max buffer needed and allocate it */ 206457b952d6SSatish Balay maxnz = 0; 206557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 206657b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 206757b952d6SSatish Balay } 206857b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 206957b952d6SSatish Balay 207057b952d6SSatish Balay /* read in my part of the matrix column indices */ 207157b952d6SSatish Balay nz = procsnz[0]; 207257b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 207357b952d6SSatish Balay mycols = ibuf; 2074cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2075e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2076cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2077cee3aa6bSSatish Balay 207857b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 207957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 208057b952d6SSatish Balay nz = procsnz[i]; 2081e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2082ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 208357b952d6SSatish Balay } 208457b952d6SSatish Balay /* read in the stuff for the last proc */ 208557b952d6SSatish Balay if ( size != 1 ) { 208657b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2087e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 208857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2089ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 209057b952d6SSatish Balay } 209157b952d6SSatish Balay PetscFree(cols); 2092d64ed03dSBarry Smith } else { 209357b952d6SSatish Balay /* determine buffer space needed for message */ 209457b952d6SSatish Balay nz = 0; 209557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 209657b952d6SSatish Balay nz += locrowlens[i]; 209757b952d6SSatish Balay } 209857b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 209957b952d6SSatish Balay mycols = ibuf; 210057b952d6SSatish Balay /* receive message of column indices*/ 2101ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2102ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2103a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 210457b952d6SSatish Balay } 210557b952d6SSatish Balay 210657b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2107cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2108cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 210957b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2110cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 211157b952d6SSatish Balay masked1 = mask + Mbs; 211257b952d6SSatish Balay masked2 = masked1 + Mbs; 211357b952d6SSatish Balay rowcount = 0; nzcount = 0; 211457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 211557b952d6SSatish Balay dcount = 0; 211657b952d6SSatish Balay odcount = 0; 211757b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 211857b952d6SSatish Balay kmax = locrowlens[rowcount]; 211957b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 212057b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 212157b952d6SSatish Balay if (!mask[tmp]) { 212257b952d6SSatish Balay mask[tmp] = 1; 212357b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 212457b952d6SSatish Balay else masked1[dcount++] = tmp; 212557b952d6SSatish Balay } 212657b952d6SSatish Balay } 212757b952d6SSatish Balay rowcount++; 212857b952d6SSatish Balay } 2129cee3aa6bSSatish Balay 213057b952d6SSatish Balay dlens[i] = dcount; 213157b952d6SSatish Balay odlens[i] = odcount; 2132cee3aa6bSSatish Balay 213357b952d6SSatish Balay /* zero out the mask elements we set */ 213457b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 213557b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 213657b952d6SSatish Balay } 2137cee3aa6bSSatish Balay 213857b952d6SSatish Balay /* create our matrix */ 2139537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2140537820f0SBarry Smith CHKERRQ(ierr); 214157b952d6SSatish Balay A = *newmat; 21426d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 214357b952d6SSatish Balay 214457b952d6SSatish Balay if (!rank) { 214557b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 214657b952d6SSatish Balay /* read in my part of the matrix numerical values */ 214757b952d6SSatish Balay nz = procsnz[0]; 214857b952d6SSatish Balay vals = buf; 2149cee3aa6bSSatish Balay mycols = ibuf; 2150cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2151e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2152cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2153537820f0SBarry Smith 215457b952d6SSatish Balay /* insert into matrix */ 215557b952d6SSatish Balay jj = rstart*bs; 215657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 215757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 215857b952d6SSatish Balay mycols += locrowlens[i]; 215957b952d6SSatish Balay vals += locrowlens[i]; 216057b952d6SSatish Balay jj++; 216157b952d6SSatish Balay } 216257b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 216357b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 216457b952d6SSatish Balay nz = procsnz[i]; 216557b952d6SSatish Balay vals = buf; 2166e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2167ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 216857b952d6SSatish Balay } 216957b952d6SSatish Balay /* the last proc */ 217057b952d6SSatish Balay if ( size != 1 ){ 217157b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2172cee3aa6bSSatish Balay vals = buf; 2173e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 217457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2175ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 217657b952d6SSatish Balay } 217757b952d6SSatish Balay PetscFree(procsnz); 2178d64ed03dSBarry Smith } else { 217957b952d6SSatish Balay /* receive numeric values */ 218057b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 218157b952d6SSatish Balay 218257b952d6SSatish Balay /* receive message of values*/ 218357b952d6SSatish Balay vals = buf; 2184cee3aa6bSSatish Balay mycols = ibuf; 2185ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2186ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2187a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 218857b952d6SSatish Balay 218957b952d6SSatish Balay /* insert into matrix */ 219057b952d6SSatish Balay jj = rstart*bs; 2191cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 219257b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 219357b952d6SSatish Balay mycols += locrowlens[i]; 219457b952d6SSatish Balay vals += locrowlens[i]; 219557b952d6SSatish Balay jj++; 219657b952d6SSatish Balay } 219757b952d6SSatish Balay } 219857b952d6SSatish Balay PetscFree(locrowlens); 219957b952d6SSatish Balay PetscFree(buf); 220057b952d6SSatish Balay PetscFree(ibuf); 220157b952d6SSatish Balay PetscFree(rowners); 220257b952d6SSatish Balay PetscFree(dlens); 2203cee3aa6bSSatish Balay PetscFree(mask); 22046d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 22056d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 22063a40ed3dSBarry Smith PetscFunctionReturn(0); 220757b952d6SSatish Balay } 220857b952d6SSatish Balay 220957b952d6SSatish Balay 2210