1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*c2760754SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.99 1998/01/13 00:07:02 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 143b2fbd54SBarry Smith 15537820f0SBarry Smith /* 16537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1957b952d6SSatish Balay length of colmap equals the global matrix length. 2057b952d6SSatish Balay */ 215615d1e5SSatish Balay #undef __FUNC__ 225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2457b952d6SSatish Balay { 2557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 2857b952d6SSatish Balay 29d64ed03dSBarry Smith PetscFunctionBegin; 30758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3257b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 343a40ed3dSBarry Smith PetscFunctionReturn(0); 3557b952d6SSatish Balay } 3657b952d6SSatish Balay 3780c1aa95SSatish Balay #define CHUNKSIZE 10 3880c1aa95SSatish Balay 39f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4080c1aa95SSatish Balay { \ 4180c1aa95SSatish Balay \ 4280c1aa95SSatish Balay brow = row/bs; \ 4380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 4580c1aa95SSatish Balay bcol = col/bs; \ 4680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 47ab26458aSBarry Smith low = 0; high = nrow; \ 48ab26458aSBarry Smith while (high-low > 3) { \ 49ab26458aSBarry Smith t = (low+high)/2; \ 50ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 51ab26458aSBarry Smith else low = t; \ 52ab26458aSBarry Smith } \ 53ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 5480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 5580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 5680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 57eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 58eada6651SSatish Balay else *bap = value; \ 59ac7a638eSSatish Balay goto a_noinsert; \ 6080c1aa95SSatish Balay } \ 6180c1aa95SSatish Balay } \ 6289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 63a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 6480c1aa95SSatish Balay if (nrow >= rmax) { \ 6580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 6680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 6780c1aa95SSatish Balay Scalar *new_a; \ 6880c1aa95SSatish Balay \ 69a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7089280ab3SLois Curfman McInnes \ 7180c1aa95SSatish Balay /* malloc new storage space */ \ 7280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 7380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 7480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 7580c1aa95SSatish Balay new_i = new_j + new_nz; \ 7680c1aa95SSatish Balay \ 7780c1aa95SSatish Balay /* copy over old data into new slots */ \ 7880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 7980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 8180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 8280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 8380c1aa95SSatish Balay len*sizeof(int)); \ 8480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 8580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 8680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 8780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 8880c1aa95SSatish Balay /* free up old matrix storage */ \ 8980c1aa95SSatish Balay PetscFree(a->a); \ 9080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 9180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 9280c1aa95SSatish Balay a->singlemalloc = 1; \ 9380c1aa95SSatish Balay \ 9480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 9680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 9780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 9880c1aa95SSatish Balay a->reallocs++; \ 9980c1aa95SSatish Balay a->nz++; \ 10080c1aa95SSatish Balay } \ 10180c1aa95SSatish Balay N = nrow++ - 1; \ 10280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 10380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 10480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 10580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 10680c1aa95SSatish Balay } \ 10780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 10880c1aa95SSatish Balay rp[_i] = bcol; \ 10980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 110ac7a638eSSatish Balay a_noinsert:; \ 11180c1aa95SSatish Balay ailen[brow] = nrow; \ 11280c1aa95SSatish Balay } 11357b952d6SSatish Balay 114ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115ac7a638eSSatish Balay { \ 116ac7a638eSSatish Balay \ 117ac7a638eSSatish Balay brow = row/bs; \ 118ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 120ac7a638eSSatish Balay bcol = col/bs; \ 121ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 122ac7a638eSSatish Balay low = 0; high = nrow; \ 123ac7a638eSSatish Balay while (high-low > 3) { \ 124ac7a638eSSatish Balay t = (low+high)/2; \ 125ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 126ac7a638eSSatish Balay else low = t; \ 127ac7a638eSSatish Balay } \ 128ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 129ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 130ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 131ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 132ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 133ac7a638eSSatish Balay else *bap = value; \ 134ac7a638eSSatish Balay goto b_noinsert; \ 135ac7a638eSSatish Balay } \ 136ac7a638eSSatish Balay } \ 13789280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 138a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139ac7a638eSSatish Balay if (nrow >= rmax) { \ 140ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14174c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142ac7a638eSSatish Balay Scalar *new_a; \ 143ac7a638eSSatish Balay \ 144a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 14589280ab3SLois Curfman McInnes \ 146ac7a638eSSatish Balay /* malloc new storage space */ \ 14774c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 150ac7a638eSSatish Balay new_i = new_j + new_nz; \ 151ac7a638eSSatish Balay \ 152ac7a638eSSatish Balay /* copy over old data into new slots */ \ 153ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 15474c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158ac7a638eSSatish Balay len*sizeof(int)); \ 159ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163ac7a638eSSatish Balay /* free up old matrix storage */ \ 16474c639caSSatish Balay PetscFree(b->a); \ 16574c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16774c639caSSatish Balay b->singlemalloc = 1; \ 168ac7a638eSSatish Balay \ 169ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 17174c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 17274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 17374c639caSSatish Balay b->reallocs++; \ 17474c639caSSatish Balay b->nz++; \ 175ac7a638eSSatish Balay } \ 176ac7a638eSSatish Balay N = nrow++ - 1; \ 177ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 178ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 179ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 180ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183ac7a638eSSatish Balay rp[_i] = bcol; \ 184ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 185ac7a638eSSatish Balay b_noinsert:; \ 186ac7a638eSSatish Balay bilen[brow] = nrow; \ 187ac7a638eSSatish Balay } 188ac7a638eSSatish Balay 1895615d1e5SSatish Balay #undef __FUNC__ 1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 19257b952d6SSatish Balay { 19357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 19457b952d6SSatish Balay Scalar value; 1954fa0d573SSatish Balay int ierr,i,j,row,col; 1964fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1974fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1984fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 19957b952d6SSatish Balay 200eada6651SSatish Balay /* Some Variables required in the macro */ 20180c1aa95SSatish Balay Mat A = baij->A; 20280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204ac7a638eSSatish Balay Scalar *aa=a->a; 205ac7a638eSSatish Balay 206ac7a638eSSatish Balay Mat B = baij->B; 207ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209ac7a638eSSatish Balay Scalar *ba=b->a; 210ac7a638eSSatish Balay 211ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 213ac7a638eSSatish Balay Scalar *ap,*bap; 21480c1aa95SSatish Balay 215d64ed03dSBarry Smith PetscFunctionBegin; 21657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 218a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220639f9d9dSBarry Smith #endif 22157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 22257b952d6SSatish Balay row = im[i] - rstart_orig; 22357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 22457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 22557b952d6SSatish Balay col = in[j] - cstart_orig; 22657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22957b952d6SSatish Balay } 2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 231a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233639f9d9dSBarry Smith #endif 23457b952d6SSatish Balay else { 23557b952d6SSatish Balay if (mat->was_assembled) { 236905e6a2fSBarry Smith if (!baij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 24057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 24157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 24257b952d6SSatish Balay col = in[j]; 2439bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2449bf004c3SSatish Balay B = baij->B; 2459bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2469bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2479bf004c3SSatish Balay ba=b->a; 24857b952d6SSatish Balay } 249d64ed03dSBarry Smith } else col = in[j]; 25057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 255d64ed03dSBarry Smith } else { 25690f02eecSBarry Smith if (roworiented && !baij->donotstash) { 25757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258d64ed03dSBarry Smith } else { 25990f02eecSBarry Smith if (!baij->donotstash) { 26057b952d6SSatish Balay row = im[i]; 26157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 26257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 26357b952d6SSatish Balay } 26457b952d6SSatish Balay } 26557b952d6SSatish Balay } 26657b952d6SSatish Balay } 26790f02eecSBarry Smith } 2683a40ed3dSBarry Smith PetscFunctionReturn(0); 26957b952d6SSatish Balay } 27057b952d6SSatish Balay 271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272ab26458aSBarry Smith #undef __FUNC__ 273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275ab26458aSBarry Smith { 276ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27730793edcSSatish Balay Scalar *value,*barray=baij->barray; 278abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 279ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 280ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 281ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282ab26458aSBarry Smith 28330793edcSSatish Balay if(!barray) { 28447513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 28530793edcSSatish Balay } 28630793edcSSatish Balay 287ab26458aSBarry Smith if (roworiented) { 288ab26458aSBarry Smith stepval = (n-1)*bs; 289ab26458aSBarry Smith } else { 290ab26458aSBarry Smith stepval = (m-1)*bs; 291ab26458aSBarry Smith } 292ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 294a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295a8c6a408SBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296ab26458aSBarry Smith #endif 297ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 298ab26458aSBarry Smith row = im[i] - rstart; 299ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 30015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 30115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 30215b57d14SSatish Balay barray = v + i*bs2; 30315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 30415b57d14SSatish Balay barray = v + j*bs2; 30515b57d14SSatish Balay } else { /* Here a copy is required */ 306ab26458aSBarry Smith if (roworiented) { 307ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 308ab26458aSBarry Smith } else { 309ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 310abef11f7SSatish Balay } 31147513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 31247513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 31330793edcSSatish Balay *barray++ = *value++; 31447513183SBarry Smith } 31547513183SBarry Smith } 31630793edcSSatish Balay barray -=bs2; 31715b57d14SSatish Balay } 318abef11f7SSatish Balay 319abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 320abef11f7SSatish Balay col = in[j] - cstart; 32130793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322ab26458aSBarry Smith } 3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 324a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325a8c6a408SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326ab26458aSBarry Smith #endif 327ab26458aSBarry Smith else { 328ab26458aSBarry Smith if (mat->was_assembled) { 329ab26458aSBarry Smith if (!baij->colmap) { 330ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331ab26458aSBarry Smith } 332a5eb4965SSatish Balay 3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 334a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335a5eb4965SSatish Balay #endif 336a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 337ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339ab26458aSBarry Smith col = in[j]; 340ab26458aSBarry Smith } 341ab26458aSBarry Smith } 342ab26458aSBarry Smith else col = in[j]; 34330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344ab26458aSBarry Smith } 345ab26458aSBarry Smith } 346d64ed03dSBarry Smith } else { 347ab26458aSBarry Smith if (!baij->donotstash) { 348ab26458aSBarry Smith if (roworiented ) { 349abef11f7SSatish Balay row = im[i]*bs; 350abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 351abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 352abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 353abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355abef11f7SSatish Balay } 356ab26458aSBarry Smith } 357ab26458aSBarry Smith } 358d64ed03dSBarry Smith } else { 359ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 360abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 361abef11f7SSatish Balay col = in[j]*bs; 362abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 363abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365ab26458aSBarry Smith } 366ab26458aSBarry Smith } 367ab26458aSBarry Smith } 368abef11f7SSatish Balay } 369abef11f7SSatish Balay } 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 3723a40ed3dSBarry Smith PetscFunctionReturn(0); 373ab26458aSBarry Smith } 3740bdbc534SSatish Balay #include <math.h> 3750bdbc534SSatish Balay #define HASH_KEY 0.6180339887 376*c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377*c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378*c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 3795615d1e5SSatish Balay #undef __FUNC__ 3800bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 3810bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3820bdbc534SSatish Balay { 3830bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3840bdbc534SSatish Balay int ierr,i,j,row,col; 3850bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386*c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 3870bdbc534SSatish Balay 388*c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 389*c2760754SSatish Balay double tmp; 390b9e4cc15SSatish Balay Scalar ** HD = baij->hd,value; 3910bdbc534SSatish Balay 3920bdbc534SSatish Balay 3930bdbc534SSatish Balay PetscFunctionBegin; 3940bdbc534SSatish Balay 3950bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 3960bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 3970bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 3980bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 3990bdbc534SSatish Balay #endif 4000bdbc534SSatish Balay row = im[i]; 401*c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4020bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4030bdbc534SSatish Balay col = in[j]; 4040bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4050bdbc534SSatish Balay /* Look up into the Hash Table */ 406*c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 407*c2760754SSatish Balay h1 = HASH(size,key,tmp); 4080bdbc534SSatish Balay 409*c2760754SSatish Balay 410*c2760754SSatish Balay idx = h1; 411*c2760754SSatish Balay if (HT[idx] != key) { 412*c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 413*c2760754SSatish Balay if (idx == size) { 414*c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 415*c2760754SSatish Balay if (idx == h1) { 416*c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 417*c2760754SSatish Balay } 418*c2760754SSatish Balay } 419*c2760754SSatish Balay } 420*c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 421*c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 422*c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4230bdbc534SSatish Balay break; 4240bdbc534SSatish Balay } 4250bdbc534SSatish Balay } else { 4260bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 4270bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 4280bdbc534SSatish Balay } else { 4290bdbc534SSatish Balay if (!baij->donotstash) { 4300bdbc534SSatish Balay row = im[i]; 4310bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4320bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 4330bdbc534SSatish Balay } 4340bdbc534SSatish Balay } 4350bdbc534SSatish Balay } 4360bdbc534SSatish Balay } 4370bdbc534SSatish Balay } 4380bdbc534SSatish Balay PetscFunctionReturn(0); 4390bdbc534SSatish Balay } 4400bdbc534SSatish Balay 4410bdbc534SSatish Balay #undef __FUNC__ 4420bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4430bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4440bdbc534SSatish Balay { 4450bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4460bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4470bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 448b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 4490bdbc534SSatish Balay 450*c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 451*c2760754SSatish Balay double tmp; 452b9e4cc15SSatish Balay Scalar ** HD = baij->hd,*value,*baij_a; 4530bdbc534SSatish Balay 454d0a41580SSatish Balay PetscFunctionBegin; 455d0a41580SSatish Balay 4560bdbc534SSatish Balay if (roworiented) { 4570bdbc534SSatish Balay stepval = (n-1)*bs; 4580bdbc534SSatish Balay } else { 4590bdbc534SSatish Balay stepval = (m-1)*bs; 4600bdbc534SSatish Balay } 4610bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4620bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4630bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4640bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4650bdbc534SSatish Balay #endif 4660bdbc534SSatish Balay row = im[i]; 467*c2760754SSatish Balay if (row >= rstart && row < rend) { 4680bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4690bdbc534SSatish Balay col = in[j]; 4700bdbc534SSatish Balay 4710bdbc534SSatish Balay /* Look up into the Hash Table */ 472*c2760754SSatish Balay key = row*Nbs+col+1; 473*c2760754SSatish Balay h1 = HASH(size,key,tmp); 4740bdbc534SSatish Balay 475*c2760754SSatish Balay idx = h1; 476*c2760754SSatish Balay if (HT[idx] != key) { 477*c2760754SSatish Balay for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 478*c2760754SSatish Balay if (idx == size) { 479*c2760754SSatish Balay for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 480*c2760754SSatish Balay if (idx == h1) { 481*c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 482*c2760754SSatish Balay } 483*c2760754SSatish Balay } 484*c2760754SSatish Balay } 485*c2760754SSatish Balay baij_a = HD[idx]; 4860bdbc534SSatish Balay if (roworiented) { 487*c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 488*c2760754SSatish Balay value = v + (i*(stepval+bs)+j)*bs; 489fef45726SSatish Balay if (addv == ADD_VALUES) { 490*c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 491*c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 492fef45726SSatish Balay baij_a[jj] += *value++; 493b4cc0f5aSSatish Balay } 494b4cc0f5aSSatish Balay } 495fef45726SSatish Balay } else { 496*c2760754SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) { 497*c2760754SSatish Balay for ( jj=ii; jj<bs2; jj+=bs ) { 498fef45726SSatish Balay baij_a[jj] = *value++; 499fef45726SSatish Balay } 500fef45726SSatish Balay } 501fef45726SSatish Balay } 5020bdbc534SSatish Balay } else { 5030bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 504fef45726SSatish Balay if (addv == ADD_VALUES) { 505b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 5060bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 507fef45726SSatish Balay baij_a[jj] += *value++; 508fef45726SSatish Balay } 509fef45726SSatish Balay } 510fef45726SSatish Balay } else { 511fef45726SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 512fef45726SSatish Balay for ( jj=0; jj<bs; jj++ ) { 513fef45726SSatish Balay baij_a[jj] = *value++; 514fef45726SSatish Balay } 515b4cc0f5aSSatish Balay } 5160bdbc534SSatish Balay } 5170bdbc534SSatish Balay } 5180bdbc534SSatish Balay } 5190bdbc534SSatish Balay } else { 5200bdbc534SSatish Balay if (!baij->donotstash) { 5210bdbc534SSatish Balay if (roworiented ) { 5220bdbc534SSatish Balay row = im[i]*bs; 5230bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5240bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5250bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5260bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 5270bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5280bdbc534SSatish Balay } 5290bdbc534SSatish Balay } 5300bdbc534SSatish Balay } 5310bdbc534SSatish Balay } else { 5320bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5330bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 5340bdbc534SSatish Balay col = in[j]*bs; 5350bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 5360bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 5370bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5380bdbc534SSatish Balay } 5390bdbc534SSatish Balay } 5400bdbc534SSatish Balay } 5410bdbc534SSatish Balay } 5420bdbc534SSatish Balay } 5430bdbc534SSatish Balay } 5440bdbc534SSatish Balay } 5450bdbc534SSatish Balay PetscFunctionReturn(0); 5460bdbc534SSatish Balay } 5470bdbc534SSatish Balay #undef __FUNC__ 5485615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 549ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 550d6de1c52SSatish Balay { 551d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 552d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 553d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 554d6de1c52SSatish Balay 555d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 556a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 557a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 558d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 559d6de1c52SSatish Balay row = idxm[i] - bsrstart; 560d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 561a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 562a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 563d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 564d6de1c52SSatish Balay col = idxn[j] - bscstart; 565d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 566d64ed03dSBarry Smith } else { 567905e6a2fSBarry Smith if (!baij->colmap) { 568905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 569905e6a2fSBarry Smith } 570e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 571dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572d9d09a02SSatish Balay else { 573dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 574d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 575d6de1c52SSatish Balay } 576d6de1c52SSatish Balay } 577d6de1c52SSatish Balay } 578d64ed03dSBarry Smith } else { 579a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 580d6de1c52SSatish Balay } 581d6de1c52SSatish Balay } 5823a40ed3dSBarry Smith PetscFunctionReturn(0); 583d6de1c52SSatish Balay } 584d6de1c52SSatish Balay 5855615d1e5SSatish Balay #undef __FUNC__ 5865615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 587ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 588d6de1c52SSatish Balay { 589d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 590d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 591acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 592d6de1c52SSatish Balay double sum = 0.0; 593d6de1c52SSatish Balay Scalar *v; 594d6de1c52SSatish Balay 595d64ed03dSBarry Smith PetscFunctionBegin; 596d6de1c52SSatish Balay if (baij->size == 1) { 597d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 598d6de1c52SSatish Balay } else { 599d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 600d6de1c52SSatish Balay v = amat->a; 601d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 6023a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 603d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 604d6de1c52SSatish Balay #else 605d6de1c52SSatish Balay sum += (*v)*(*v); v++; 606d6de1c52SSatish Balay #endif 607d6de1c52SSatish Balay } 608d6de1c52SSatish Balay v = bmat->a; 609d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 6103a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 611d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 612d6de1c52SSatish Balay #else 613d6de1c52SSatish Balay sum += (*v)*(*v); v++; 614d6de1c52SSatish Balay #endif 615d6de1c52SSatish Balay } 616ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 617d6de1c52SSatish Balay *norm = sqrt(*norm); 618d64ed03dSBarry Smith } else { 619e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 620d6de1c52SSatish Balay } 621d64ed03dSBarry Smith } 6223a40ed3dSBarry Smith PetscFunctionReturn(0); 623d6de1c52SSatish Balay } 62457b952d6SSatish Balay 6255615d1e5SSatish Balay #undef __FUNC__ 6265615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 627ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 62857b952d6SSatish Balay { 62957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 63057b952d6SSatish Balay MPI_Comm comm = mat->comm; 63157b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 63257b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 63357b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 63457b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 63557b952d6SSatish Balay InsertMode addv; 63657b952d6SSatish Balay Scalar *rvalues,*svalues; 63757b952d6SSatish Balay 638d64ed03dSBarry Smith PetscFunctionBegin; 63957b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 640ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 64157b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 642a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 64357b952d6SSatish Balay } 644e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 64557b952d6SSatish Balay 64657b952d6SSatish Balay /* first count number of contributors to each processor */ 64757b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 64857b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 64957b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 65057b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 65157b952d6SSatish Balay idx = baij->stash.idx[i]; 65257b952d6SSatish Balay for ( j=0; j<size; j++ ) { 65357b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 65457b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 65557b952d6SSatish Balay } 65657b952d6SSatish Balay } 65757b952d6SSatish Balay } 65857b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 65957b952d6SSatish Balay 66057b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 66157b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 662ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 66357b952d6SSatish Balay nreceives = work[rank]; 664ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 66557b952d6SSatish Balay nmax = work[rank]; 66657b952d6SSatish Balay PetscFree(work); 66757b952d6SSatish Balay 66857b952d6SSatish Balay /* post receives: 66957b952d6SSatish Balay 1) each message will consist of ordered pairs 67057b952d6SSatish Balay (global index,value) we store the global index as a double 67157b952d6SSatish Balay to simplify the message passing. 67257b952d6SSatish Balay 2) since we don't know how long each individual message is we 67357b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 67457b952d6SSatish Balay this is a lot of wasted space. 67557b952d6SSatish Balay 67657b952d6SSatish Balay 67757b952d6SSatish Balay This could be done better. 67857b952d6SSatish Balay */ 679f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 680f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 68157b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 682ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 68357b952d6SSatish Balay } 68457b952d6SSatish Balay 68557b952d6SSatish Balay /* do sends: 68657b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 68757b952d6SSatish Balay the ith processor 68857b952d6SSatish Balay */ 68957b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 690d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 69157b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 69257b952d6SSatish Balay starts[0] = 0; 69357b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 69457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 69557b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 69657b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 69757b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 69857b952d6SSatish Balay } 69957b952d6SSatish Balay PetscFree(owner); 70057b952d6SSatish Balay starts[0] = 0; 70157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 70257b952d6SSatish Balay count = 0; 70357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 70457b952d6SSatish Balay if (procs[i]) { 705ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 70657b952d6SSatish Balay } 70757b952d6SSatish Balay } 70857b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 70957b952d6SSatish Balay 71057b952d6SSatish Balay /* Free cache space */ 711cd1fa5fbSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 71257b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 71357b952d6SSatish Balay 71457b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 71557b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 71657b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 71757b952d6SSatish Balay baij->rmax = nmax; 71857b952d6SSatish Balay 7193a40ed3dSBarry Smith PetscFunctionReturn(0); 72057b952d6SSatish Balay } 721bd7f49f5SSatish Balay 722fef45726SSatish Balay /* 723fef45726SSatish Balay Creates the hash table, and sets the table 724fef45726SSatish Balay This table is created only once. 725fef45726SSatish Balay If new entried need to be added to the matrix 726fef45726SSatish Balay then the hash table has to be destroyed and 727fef45726SSatish Balay recreated. 728fef45726SSatish Balay */ 729fef45726SSatish Balay #undef __FUNC__ 730fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 731d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 732596b8d2eSBarry Smith { 733596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 734596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 735596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 7360bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7370bdbc534SSatish Balay int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 7380bdbc534SSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col; 739fef45726SSatish Balay int *HT,key; 7400bdbc534SSatish Balay Scalar **HD; 741*c2760754SSatish Balay double tmp; 742fef45726SSatish Balay 743d64ed03dSBarry Smith PetscFunctionBegin; 7440bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7450bdbc534SSatish Balay size = baij->ht_size; 746fef45726SSatish Balay 7470bdbc534SSatish Balay if (baij->ht) { 7480bdbc534SSatish Balay PetscFunctionReturn(0); 749596b8d2eSBarry Smith } 7500bdbc534SSatish Balay 751fef45726SSatish Balay /* Allocate Memory for Hash Table */ 752b9e4cc15SSatish Balay baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 753b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 754b9e4cc15SSatish Balay HD = baij->hd; 755a07cd24cSSatish Balay HT = baij->ht; 756b9e4cc15SSatish Balay 757b9e4cc15SSatish Balay 758*c2760754SSatish Balay PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 7590bdbc534SSatish Balay 760596b8d2eSBarry Smith 761596b8d2eSBarry Smith /* Loop Over A */ 7620bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 763596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 7640bdbc534SSatish Balay row = i+rstart; 7650bdbc534SSatish Balay col = aj[j]+cstart; 766596b8d2eSBarry Smith 7670bdbc534SSatish Balay key = row*baij->Nbs + col + 1; 7680bdbc534SSatish Balay /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 769*c2760754SSatish Balay h1 = HASH(size,key,tmp); 7700bdbc534SSatish Balay 7710bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7720bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7730bdbc534SSatish Balay HT[(h1+k)%size] = key; 7740bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 775596b8d2eSBarry Smith break; 776596b8d2eSBarry Smith } else ct++; 777596b8d2eSBarry Smith } 778bd7f49f5SSatish Balay if (k> max1) max1 = k; 779596b8d2eSBarry Smith } 780596b8d2eSBarry Smith } 781596b8d2eSBarry Smith /* Loop Over B */ 7820bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 783596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 7840bdbc534SSatish Balay row = i+rstart; 7850bdbc534SSatish Balay col = garray[bj[j]]; 7860bdbc534SSatish Balay key = row*baij->Nbs + col + 1; 7870bdbc534SSatish Balay /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 788*c2760754SSatish Balay h1 = HASH(size,key,tmp); 7890bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7900bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7910bdbc534SSatish Balay HT[(h1+k)%size] = key; 7920bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 793596b8d2eSBarry Smith break; 794596b8d2eSBarry Smith } else ct++; 795596b8d2eSBarry Smith } 796bd7f49f5SSatish Balay if (k> max2) max2 = k; 797596b8d2eSBarry Smith } 798596b8d2eSBarry Smith } 799596b8d2eSBarry Smith 800596b8d2eSBarry Smith /* Print Summary */ 801*c2760754SSatish Balay for ( i=0,j=0; i<size; i++) 802596b8d2eSBarry Smith if (HT[i]) {j++;} 803596b8d2eSBarry Smith 8040bdbc534SSatish Balay /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 8050bdbc534SSatish Balay PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 8060bdbc534SSatish Balay fflush(stdout); 8073a40ed3dSBarry Smith PetscFunctionReturn(0); 808596b8d2eSBarry Smith } 80957b952d6SSatish Balay 8105615d1e5SSatish Balay #undef __FUNC__ 8115615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 812ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 81357b952d6SSatish Balay { 81457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 81557b952d6SSatish Balay MPI_Status *send_status,recv_status; 81657b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 817596b8d2eSBarry Smith int bs=baij->bs,row,col,other_disassembled,flg; 81857b952d6SSatish Balay Scalar *values,val; 819e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 82057b952d6SSatish Balay 821d64ed03dSBarry Smith PetscFunctionBegin; 82257b952d6SSatish Balay /* wait on receives */ 82357b952d6SSatish Balay while (count) { 824ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 82557b952d6SSatish Balay /* unpack receives into our local space */ 82657b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 827ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 82857b952d6SSatish Balay n = n/3; 82957b952d6SSatish Balay for ( i=0; i<n; i++ ) { 83057b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 83157b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 83257b952d6SSatish Balay val = values[3*i+2]; 83357b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 83457b952d6SSatish Balay col -= baij->cstart*bs; 8356fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 836d64ed03dSBarry Smith } else { 83757b952d6SSatish Balay if (mat->was_assembled) { 838905e6a2fSBarry Smith if (!baij->colmap) { 839905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 840905e6a2fSBarry Smith } 841a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 84257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 84357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 84457b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 84557b952d6SSatish Balay } 84657b952d6SSatish Balay } 8476fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 84857b952d6SSatish Balay } 84957b952d6SSatish Balay } 85057b952d6SSatish Balay count--; 85157b952d6SSatish Balay } 85257b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 85357b952d6SSatish Balay 85457b952d6SSatish Balay /* wait on sends */ 85557b952d6SSatish Balay if (baij->nsends) { 856d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 857ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 85857b952d6SSatish Balay PetscFree(send_status); 85957b952d6SSatish Balay } 86057b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 86157b952d6SSatish Balay 86257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 86357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 86457b952d6SSatish Balay 86557b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 86657b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 867ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 86857b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 86957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 87057b952d6SSatish Balay } 87157b952d6SSatish Balay 8726d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 87357b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 87457b952d6SSatish Balay } 87557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 87657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 87757b952d6SSatish Balay 878596b8d2eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 87944e6884eSSatish Balay if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 880a07cd24cSSatish Balay double fact = 1.39; 881fef45726SSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 882a74b47caSSatish Balay if (fact <= 1.0) fact = 1.39; 883a74b47caSSatish Balay PLogInfo(0,"[%d]MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",PetscGlobalRank,fact); 884fef45726SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 8850bdbc534SSatish Balay mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 8860bdbc534SSatish Balay mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 887bd7f49f5SSatish Balay } 88857b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 8893a40ed3dSBarry Smith PetscFunctionReturn(0); 89057b952d6SSatish Balay } 89157b952d6SSatish Balay 8925615d1e5SSatish Balay #undef __FUNC__ 8935615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 89457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 89557b952d6SSatish Balay { 89657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 89757b952d6SSatish Balay int ierr; 89857b952d6SSatish Balay 899d64ed03dSBarry Smith PetscFunctionBegin; 90057b952d6SSatish Balay if (baij->size == 1) { 90157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 902a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 9033a40ed3dSBarry Smith PetscFunctionReturn(0); 90457b952d6SSatish Balay } 90557b952d6SSatish Balay 9065615d1e5SSatish Balay #undef __FUNC__ 9075615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 90857b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 90957b952d6SSatish Balay { 91057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 911cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 91257b952d6SSatish Balay FILE *fd; 91357b952d6SSatish Balay ViewerType vtype; 91457b952d6SSatish Balay 915d64ed03dSBarry Smith PetscFunctionBegin; 91657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 91757b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 91857b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 919639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 9204e220ebcSLois Curfman McInnes MatInfo info; 92157b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 92257b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 9234e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 92457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 92557b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 9264e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 9274e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 9284e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 9294e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 9304e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 9314e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 93257b952d6SSatish Balay fflush(fd); 93357b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 93457b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 9353a40ed3dSBarry Smith PetscFunctionReturn(0); 936d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 937bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 9383a40ed3dSBarry Smith PetscFunctionReturn(0); 93957b952d6SSatish Balay } 94057b952d6SSatish Balay } 94157b952d6SSatish Balay 94257b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 94357b952d6SSatish Balay Draw draw; 94457b952d6SSatish Balay PetscTruth isnull; 94557b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 9463a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 94757b952d6SSatish Balay } 94857b952d6SSatish Balay 94957b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 95057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 95157b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 95257b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 95357b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 95457b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 95557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 95657b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 95757b952d6SSatish Balay fflush(fd); 95857b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 959d64ed03dSBarry Smith } else { 96057b952d6SSatish Balay int size = baij->size; 96157b952d6SSatish Balay rank = baij->rank; 96257b952d6SSatish Balay if (size == 1) { 96357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 964d64ed03dSBarry Smith } else { 96557b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 96657b952d6SSatish Balay Mat A; 96757b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 96857b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 96957b952d6SSatish Balay int mbs=baij->mbs; 97057b952d6SSatish Balay Scalar *a; 97157b952d6SSatish Balay 97257b952d6SSatish Balay if (!rank) { 97355843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 974d64ed03dSBarry Smith } else { 97555843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 97657b952d6SSatish Balay } 97757b952d6SSatish Balay PLogObjectParent(mat,A); 97857b952d6SSatish Balay 97957b952d6SSatish Balay /* copy over the A part */ 98057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 98157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 98257b952d6SSatish Balay row = baij->rstart; 98357b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 98457b952d6SSatish Balay 98557b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 98657b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 98757b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 98857b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 98957b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 99057b952d6SSatish Balay for (k=0; k<bs; k++ ) { 991cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 992cee3aa6bSSatish Balay col++; a += bs; 99357b952d6SSatish Balay } 99457b952d6SSatish Balay } 99557b952d6SSatish Balay } 99657b952d6SSatish Balay /* copy over the B part */ 99757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 99857b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 99957b952d6SSatish Balay row = baij->rstart*bs; 100057b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 100157b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 100257b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 100357b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 100457b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 100557b952d6SSatish Balay for (k=0; k<bs; k++ ) { 1006cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1007cee3aa6bSSatish Balay col++; a += bs; 100857b952d6SSatish Balay } 100957b952d6SSatish Balay } 101057b952d6SSatish Balay } 101157b952d6SSatish Balay PetscFree(rvals); 10126d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 10136d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 101455843e3eSBarry Smith /* 101555843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 101655843e3eSBarry Smith synchronized across all processors that share the Draw object 101755843e3eSBarry Smith */ 101855843e3eSBarry Smith if (!rank || vtype == DRAW_VIEWER) { 101957b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 102057b952d6SSatish Balay } 102157b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 102257b952d6SSatish Balay } 102357b952d6SSatish Balay } 10243a40ed3dSBarry Smith PetscFunctionReturn(0); 102557b952d6SSatish Balay } 102657b952d6SSatish Balay 102757b952d6SSatish Balay 102857b952d6SSatish Balay 10295615d1e5SSatish Balay #undef __FUNC__ 10305615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 1031ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 103257b952d6SSatish Balay { 103357b952d6SSatish Balay Mat mat = (Mat) obj; 103457b952d6SSatish Balay int ierr; 103557b952d6SSatish Balay ViewerType vtype; 103657b952d6SSatish Balay 1037d64ed03dSBarry Smith PetscFunctionBegin; 103857b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 103957b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 104057b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 104157b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 10423a40ed3dSBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 10433a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 104457b952d6SSatish Balay } 10453a40ed3dSBarry Smith PetscFunctionReturn(0); 104657b952d6SSatish Balay } 104757b952d6SSatish Balay 10485615d1e5SSatish Balay #undef __FUNC__ 10495615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1050ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 105179bdfe76SSatish Balay { 105279bdfe76SSatish Balay Mat mat = (Mat) obj; 105379bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 105479bdfe76SSatish Balay int ierr; 105579bdfe76SSatish Balay 1056d64ed03dSBarry Smith PetscFunctionBegin; 10573a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 105879bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 105979bdfe76SSatish Balay #endif 106079bdfe76SSatish Balay 106183e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 106279bdfe76SSatish Balay PetscFree(baij->rowners); 106379bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 106479bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 106579bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 106679bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 106779bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 106879bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 106979bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 107030793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 1071b9e4cc15SSatish Balay if (baij->hd) PetscFree(baij->hd); 107279bdfe76SSatish Balay PetscFree(baij); 107379bdfe76SSatish Balay PLogObjectDestroy(mat); 107479bdfe76SSatish Balay PetscHeaderDestroy(mat); 10753a40ed3dSBarry Smith PetscFunctionReturn(0); 107679bdfe76SSatish Balay } 107779bdfe76SSatish Balay 10785615d1e5SSatish Balay #undef __FUNC__ 10795615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1080ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1081cee3aa6bSSatish Balay { 1082cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 108347b4a8eaSLois Curfman McInnes int ierr, nt; 1084cee3aa6bSSatish Balay 1085d64ed03dSBarry Smith PetscFunctionBegin; 1086c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 108747b4a8eaSLois Curfman McInnes if (nt != a->n) { 1088a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 108947b4a8eaSLois Curfman McInnes } 1090c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 109147b4a8eaSLois Curfman McInnes if (nt != a->m) { 1092a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 109347b4a8eaSLois Curfman McInnes } 109443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1095cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 109643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1097cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 109843a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 10993a40ed3dSBarry Smith PetscFunctionReturn(0); 1100cee3aa6bSSatish Balay } 1101cee3aa6bSSatish Balay 11025615d1e5SSatish Balay #undef __FUNC__ 11035615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1104ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105cee3aa6bSSatish Balay { 1106cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1107cee3aa6bSSatish Balay int ierr; 1108d64ed03dSBarry Smith 1109d64ed03dSBarry Smith PetscFunctionBegin; 111043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1111cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 111243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1113cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 11143a40ed3dSBarry Smith PetscFunctionReturn(0); 1115cee3aa6bSSatish Balay } 1116cee3aa6bSSatish Balay 11175615d1e5SSatish Balay #undef __FUNC__ 11185615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1119ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1120cee3aa6bSSatish Balay { 1121cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1122cee3aa6bSSatish Balay int ierr; 1123cee3aa6bSSatish Balay 1124d64ed03dSBarry Smith PetscFunctionBegin; 1125cee3aa6bSSatish Balay /* do nondiagonal part */ 1126cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1127cee3aa6bSSatish Balay /* send it on its way */ 1128537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1129cee3aa6bSSatish Balay /* do local part */ 1130cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1131cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1132cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1133cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1134639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 11353a40ed3dSBarry Smith PetscFunctionReturn(0); 1136cee3aa6bSSatish Balay } 1137cee3aa6bSSatish Balay 11385615d1e5SSatish Balay #undef __FUNC__ 11395615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1140ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1141cee3aa6bSSatish Balay { 1142cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1143cee3aa6bSSatish Balay int ierr; 1144cee3aa6bSSatish Balay 1145d64ed03dSBarry Smith PetscFunctionBegin; 1146cee3aa6bSSatish Balay /* do nondiagonal part */ 1147cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1148cee3aa6bSSatish Balay /* send it on its way */ 1149537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1150cee3aa6bSSatish Balay /* do local part */ 1151cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1152cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1153cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1154cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1155537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 11563a40ed3dSBarry Smith PetscFunctionReturn(0); 1157cee3aa6bSSatish Balay } 1158cee3aa6bSSatish Balay 1159cee3aa6bSSatish Balay /* 1160cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1161cee3aa6bSSatish Balay diagonal block 1162cee3aa6bSSatish Balay */ 11635615d1e5SSatish Balay #undef __FUNC__ 11645615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1165ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1166cee3aa6bSSatish Balay { 1167cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 11683a40ed3dSBarry Smith int ierr; 1169d64ed03dSBarry Smith 1170d64ed03dSBarry Smith PetscFunctionBegin; 1171a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 11723a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 11733a40ed3dSBarry Smith PetscFunctionReturn(0); 1174cee3aa6bSSatish Balay } 1175cee3aa6bSSatish Balay 11765615d1e5SSatish Balay #undef __FUNC__ 11775615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1178ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1179cee3aa6bSSatish Balay { 1180cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1181cee3aa6bSSatish Balay int ierr; 1182d64ed03dSBarry Smith 1183d64ed03dSBarry Smith PetscFunctionBegin; 1184cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1185cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 11863a40ed3dSBarry Smith PetscFunctionReturn(0); 1187cee3aa6bSSatish Balay } 1188026e39d0SSatish Balay 11895615d1e5SSatish Balay #undef __FUNC__ 11905615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1191ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 119257b952d6SSatish Balay { 119357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1194d64ed03dSBarry Smith 1195d64ed03dSBarry Smith PetscFunctionBegin; 1196bd7f49f5SSatish Balay if (m) *m = mat->M; 1197bd7f49f5SSatish Balay if (n) *n = mat->N; 11983a40ed3dSBarry Smith PetscFunctionReturn(0); 119957b952d6SSatish Balay } 120057b952d6SSatish Balay 12015615d1e5SSatish Balay #undef __FUNC__ 12025615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1203ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 120457b952d6SSatish Balay { 120557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1206d64ed03dSBarry Smith 1207d64ed03dSBarry Smith PetscFunctionBegin; 120857b952d6SSatish Balay *m = mat->m; *n = mat->N; 12093a40ed3dSBarry Smith PetscFunctionReturn(0); 121057b952d6SSatish Balay } 121157b952d6SSatish Balay 12125615d1e5SSatish Balay #undef __FUNC__ 12135615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1214ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 121557b952d6SSatish Balay { 121657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1217d64ed03dSBarry Smith 1218d64ed03dSBarry Smith PetscFunctionBegin; 121957b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 12203a40ed3dSBarry Smith PetscFunctionReturn(0); 122157b952d6SSatish Balay } 122257b952d6SSatish Balay 1223acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1224acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1225acdf5bf4SSatish Balay 12265615d1e5SSatish Balay #undef __FUNC__ 12275615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1228acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1229acdf5bf4SSatish Balay { 1230acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1231acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1232acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1233d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1234d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1235acdf5bf4SSatish Balay 1236d64ed03dSBarry Smith PetscFunctionBegin; 1237a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1238acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1239acdf5bf4SSatish Balay 1240acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1241acdf5bf4SSatish Balay /* 1242acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1243acdf5bf4SSatish Balay */ 1244acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1245bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1246bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1247acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1248acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1249acdf5bf4SSatish Balay } 1250acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1251acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1252acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1253acdf5bf4SSatish Balay } 1254acdf5bf4SSatish Balay 1255a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1256d9d09a02SSatish Balay lrow = row - brstart; 1257acdf5bf4SSatish Balay 1258acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1259acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1260acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1261acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1262acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1263acdf5bf4SSatish Balay nztot = nzA + nzB; 1264acdf5bf4SSatish Balay 1265acdf5bf4SSatish Balay cmap = mat->garray; 1266acdf5bf4SSatish Balay if (v || idx) { 1267acdf5bf4SSatish Balay if (nztot) { 1268acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1269acdf5bf4SSatish Balay int imark = -1; 1270acdf5bf4SSatish Balay if (v) { 1271acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1272acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1273d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1274acdf5bf4SSatish Balay else break; 1275acdf5bf4SSatish Balay } 1276acdf5bf4SSatish Balay imark = i; 1277acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1278acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1279acdf5bf4SSatish Balay } 1280acdf5bf4SSatish Balay if (idx) { 1281acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1282acdf5bf4SSatish Balay if (imark > -1) { 1283acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1284bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1285acdf5bf4SSatish Balay } 1286acdf5bf4SSatish Balay } else { 1287acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1288d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1289d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1290acdf5bf4SSatish Balay else break; 1291acdf5bf4SSatish Balay } 1292acdf5bf4SSatish Balay imark = i; 1293acdf5bf4SSatish Balay } 1294d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1295d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1296acdf5bf4SSatish Balay } 1297d64ed03dSBarry Smith } else { 1298d212a18eSSatish Balay if (idx) *idx = 0; 1299d212a18eSSatish Balay if (v) *v = 0; 1300d212a18eSSatish Balay } 1301acdf5bf4SSatish Balay } 1302acdf5bf4SSatish Balay *nz = nztot; 1303acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1304acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 13053a40ed3dSBarry Smith PetscFunctionReturn(0); 1306acdf5bf4SSatish Balay } 1307acdf5bf4SSatish Balay 13085615d1e5SSatish Balay #undef __FUNC__ 13095615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1310acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1311acdf5bf4SSatish Balay { 1312acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1313d64ed03dSBarry Smith 1314d64ed03dSBarry Smith PetscFunctionBegin; 1315acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1316a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1317acdf5bf4SSatish Balay } 1318acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13193a40ed3dSBarry Smith PetscFunctionReturn(0); 1320acdf5bf4SSatish Balay } 1321acdf5bf4SSatish Balay 13225615d1e5SSatish Balay #undef __FUNC__ 13235615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1324ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 13255a838052SSatish Balay { 13265a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1327d64ed03dSBarry Smith 1328d64ed03dSBarry Smith PetscFunctionBegin; 13295a838052SSatish Balay *bs = baij->bs; 13303a40ed3dSBarry Smith PetscFunctionReturn(0); 13315a838052SSatish Balay } 13325a838052SSatish Balay 13335615d1e5SSatish Balay #undef __FUNC__ 13345615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1335ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 133658667388SSatish Balay { 133758667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 133858667388SSatish Balay int ierr; 1339d64ed03dSBarry Smith 1340d64ed03dSBarry Smith PetscFunctionBegin; 134158667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 134258667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 13433a40ed3dSBarry Smith PetscFunctionReturn(0); 134458667388SSatish Balay } 13450ac07820SSatish Balay 13465615d1e5SSatish Balay #undef __FUNC__ 13475615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1348ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 13490ac07820SSatish Balay { 13504e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 13514e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 13527d57db60SLois Curfman McInnes int ierr; 13537d57db60SLois Curfman McInnes double isend[5], irecv[5]; 13540ac07820SSatish Balay 1355d64ed03dSBarry Smith PetscFunctionBegin; 13564e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 13574e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 13584e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 13594e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 13604e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 13610ac07820SSatish Balay if (flag == MAT_LOCAL) { 13624e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 13634e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 13644e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 13654e220ebcSLois Curfman McInnes info->memory = isend[3]; 13664e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 13670ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1368ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 13694e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13704e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13714e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13724e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13734e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13740ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1375ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 13764e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13774e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13784e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13794e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13804e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13810ac07820SSatish Balay } 13825a5d4f66SBarry Smith info->rows_global = (double)a->M; 13835a5d4f66SBarry Smith info->columns_global = (double)a->N; 13845a5d4f66SBarry Smith info->rows_local = (double)a->m; 13855a5d4f66SBarry Smith info->columns_local = (double)a->N; 13864e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 13874e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 13884e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 13893a40ed3dSBarry Smith PetscFunctionReturn(0); 13900ac07820SSatish Balay } 13910ac07820SSatish Balay 13925615d1e5SSatish Balay #undef __FUNC__ 13935615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1394ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 139558667388SSatish Balay { 139658667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 139758667388SSatish Balay 1398d64ed03dSBarry Smith PetscFunctionBegin; 139958667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 140058667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 14016da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1402c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 140396854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1404c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1405b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1406b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1407b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1408aeafbbfcSLois Curfman McInnes a->roworiented = 1; 140958667388SSatish Balay MatSetOption(a->A,op); 141058667388SSatish Balay MatSetOption(a->B,op); 1411b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 14126da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 141358667388SSatish Balay op == MAT_SYMMETRIC || 141458667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 141558667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 141658667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 141758667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 141858667388SSatish Balay a->roworiented = 0; 141958667388SSatish Balay MatSetOption(a->A,op); 142058667388SSatish Balay MatSetOption(a->B,op); 14212b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 142290f02eecSBarry Smith a->donotstash = 1; 1423d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1424d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1425d64ed03dSBarry Smith } else { 1426d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1427d64ed03dSBarry Smith } 14283a40ed3dSBarry Smith PetscFunctionReturn(0); 142958667388SSatish Balay } 143058667388SSatish Balay 14315615d1e5SSatish Balay #undef __FUNC__ 14325615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1433ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 14340ac07820SSatish Balay { 14350ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 14360ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 14370ac07820SSatish Balay Mat B; 14380ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 14390ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 14400ac07820SSatish Balay Scalar *a; 14410ac07820SSatish Balay 1442d64ed03dSBarry Smith PetscFunctionBegin; 1443a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 14440ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 14450ac07820SSatish Balay CHKERRQ(ierr); 14460ac07820SSatish Balay 14470ac07820SSatish Balay /* copy over the A part */ 14480ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 14490ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14500ac07820SSatish Balay row = baij->rstart; 14510ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 14520ac07820SSatish Balay 14530ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14540ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14550ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14560ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14570ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 14580ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14590ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14600ac07820SSatish Balay col++; a += bs; 14610ac07820SSatish Balay } 14620ac07820SSatish Balay } 14630ac07820SSatish Balay } 14640ac07820SSatish Balay /* copy over the B part */ 14650ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 14660ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14670ac07820SSatish Balay row = baij->rstart*bs; 14680ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14690ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14700ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14710ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14720ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 14730ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14740ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14750ac07820SSatish Balay col++; a += bs; 14760ac07820SSatish Balay } 14770ac07820SSatish Balay } 14780ac07820SSatish Balay } 14790ac07820SSatish Balay PetscFree(rvals); 14800ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14810ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14820ac07820SSatish Balay 14830ac07820SSatish Balay if (matout != PETSC_NULL) { 14840ac07820SSatish Balay *matout = B; 14850ac07820SSatish Balay } else { 14860ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 14870ac07820SSatish Balay PetscFree(baij->rowners); 14880ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 14890ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 14900ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 14910ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 14920ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 14930ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 14940ac07820SSatish Balay PetscFree(baij); 1495f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 14960ac07820SSatish Balay PetscHeaderDestroy(B); 14970ac07820SSatish Balay } 14983a40ed3dSBarry Smith PetscFunctionReturn(0); 14990ac07820SSatish Balay } 15000e95ebc0SSatish Balay 15015615d1e5SSatish Balay #undef __FUNC__ 15025615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 15030e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 15040e95ebc0SSatish Balay { 15050e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 15060e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 15070e95ebc0SSatish Balay int ierr,s1,s2,s3; 15080e95ebc0SSatish Balay 1509d64ed03dSBarry Smith PetscFunctionBegin; 15100e95ebc0SSatish Balay if (ll) { 15110e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 15120e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1513a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 15140e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 15150e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 15160e95ebc0SSatish Balay } 1517a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 15183a40ed3dSBarry Smith PetscFunctionReturn(0); 15190e95ebc0SSatish Balay } 15200e95ebc0SSatish Balay 15215615d1e5SSatish Balay #undef __FUNC__ 15225615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1523ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 15240ac07820SSatish Balay { 15250ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 15260ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1527a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 15280ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 15290ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1530a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 15310ac07820SSatish Balay MPI_Comm comm = A->comm; 15320ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 15330ac07820SSatish Balay MPI_Status recv_status,*send_status; 15340ac07820SSatish Balay IS istmp; 15350ac07820SSatish Balay 1536d64ed03dSBarry Smith PetscFunctionBegin; 15370ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 15380ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 15390ac07820SSatish Balay 15400ac07820SSatish Balay /* first count number of contributors to each processor */ 15410ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 15420ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 15430ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 15440ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15450ac07820SSatish Balay idx = rows[i]; 15460ac07820SSatish Balay found = 0; 15470ac07820SSatish Balay for ( j=0; j<size; j++ ) { 15480ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 15490ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 15500ac07820SSatish Balay } 15510ac07820SSatish Balay } 1552a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 15530ac07820SSatish Balay } 15540ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 15550ac07820SSatish Balay 15560ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 15570ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1558ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 15590ac07820SSatish Balay nrecvs = work[rank]; 1560ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 15610ac07820SSatish Balay nmax = work[rank]; 15620ac07820SSatish Balay PetscFree(work); 15630ac07820SSatish Balay 15640ac07820SSatish Balay /* post receives: */ 1565d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1566d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 15670ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1568ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 15690ac07820SSatish Balay } 15700ac07820SSatish Balay 15710ac07820SSatish Balay /* do sends: 15720ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 15730ac07820SSatish Balay the ith processor 15740ac07820SSatish Balay */ 15750ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1576ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 15770ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 15780ac07820SSatish Balay starts[0] = 0; 15790ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15800ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15810ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 15820ac07820SSatish Balay } 15830ac07820SSatish Balay ISRestoreIndices(is,&rows); 15840ac07820SSatish Balay 15850ac07820SSatish Balay starts[0] = 0; 15860ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15870ac07820SSatish Balay count = 0; 15880ac07820SSatish Balay for ( i=0; i<size; i++ ) { 15890ac07820SSatish Balay if (procs[i]) { 1590ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 15910ac07820SSatish Balay } 15920ac07820SSatish Balay } 15930ac07820SSatish Balay PetscFree(starts); 15940ac07820SSatish Balay 15950ac07820SSatish Balay base = owners[rank]*bs; 15960ac07820SSatish Balay 15970ac07820SSatish Balay /* wait on receives */ 15980ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 15990ac07820SSatish Balay source = lens + nrecvs; 16000ac07820SSatish Balay count = nrecvs; slen = 0; 16010ac07820SSatish Balay while (count) { 1602ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 16030ac07820SSatish Balay /* unpack receives into our local space */ 1604ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 16050ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 16060ac07820SSatish Balay lens[imdex] = n; 16070ac07820SSatish Balay slen += n; 16080ac07820SSatish Balay count--; 16090ac07820SSatish Balay } 16100ac07820SSatish Balay PetscFree(recv_waits); 16110ac07820SSatish Balay 16120ac07820SSatish Balay /* move the data into the send scatter */ 16130ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 16140ac07820SSatish Balay count = 0; 16150ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 16160ac07820SSatish Balay values = rvalues + i*nmax; 16170ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 16180ac07820SSatish Balay lrows[count++] = values[j] - base; 16190ac07820SSatish Balay } 16200ac07820SSatish Balay } 16210ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 16220ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 16230ac07820SSatish Balay 16240ac07820SSatish Balay /* actually zap the local rows */ 1625029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 16260ac07820SSatish Balay PLogObjectParent(A,istmp); 1627a07cd24cSSatish Balay 1628a07cd24cSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 16290ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 16300ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 16310ac07820SSatish Balay 1632a07cd24cSSatish Balay if (diag) { 1633a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1634a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1635a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1636a07cd24cSSatish Balay } 1637a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1638a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1639a07cd24cSSatish Balay } 1640a07cd24cSSatish Balay PetscFree(lrows); 1641a07cd24cSSatish Balay 16420ac07820SSatish Balay /* wait on sends */ 16430ac07820SSatish Balay if (nsends) { 1644d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1645ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 16460ac07820SSatish Balay PetscFree(send_status); 16470ac07820SSatish Balay } 16480ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 16490ac07820SSatish Balay 16503a40ed3dSBarry Smith PetscFunctionReturn(0); 16510ac07820SSatish Balay } 1652ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 16535615d1e5SSatish Balay #undef __FUNC__ 16545615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1655ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1656ba4ca20aSSatish Balay { 1657ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 16583a40ed3dSBarry Smith int ierr; 1659ba4ca20aSSatish Balay 1660d64ed03dSBarry Smith PetscFunctionBegin; 16613a40ed3dSBarry Smith if (!a->rank) { 16623a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 16633a40ed3dSBarry Smith } 16643a40ed3dSBarry Smith PetscFunctionReturn(0); 1665ba4ca20aSSatish Balay } 16660ac07820SSatish Balay 16675615d1e5SSatish Balay #undef __FUNC__ 16685615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1669ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1670bb5a7306SBarry Smith { 1671bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1672bb5a7306SBarry Smith int ierr; 1673d64ed03dSBarry Smith 1674d64ed03dSBarry Smith PetscFunctionBegin; 1675bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 16763a40ed3dSBarry Smith PetscFunctionReturn(0); 1677bb5a7306SBarry Smith } 1678bb5a7306SBarry Smith 16790ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 16800ac07820SSatish Balay 168179bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 168279bdfe76SSatish Balay static struct _MatOps MatOps = { 1683bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 16844c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 16854c50302cSBarry Smith 0,0,0,0, 16860ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 16870e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 168858667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 16894c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 16904c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 16914c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 169294a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1693d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1694ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1695bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1696ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 169779bdfe76SSatish Balay 169879bdfe76SSatish Balay 16995615d1e5SSatish Balay #undef __FUNC__ 17005615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 170179bdfe76SSatish Balay /*@C 170279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 170379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 170479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 170579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 170679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 170779bdfe76SSatish Balay 170879bdfe76SSatish Balay Input Parameters: 170979bdfe76SSatish Balay . comm - MPI communicator 171079bdfe76SSatish Balay . bs - size of blockk 171179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 171292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 171392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 171492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 171592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 171692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 171779bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 171892e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 171979bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 172079bdfe76SSatish Balay submatrix (same for all local rows) 172192e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 172292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 172392e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 172492e8d321SLois Curfman McInnes it is zero. 172592e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 172679bdfe76SSatish Balay submatrix (same for all local rows). 172792e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 172892e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 172992e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 173079bdfe76SSatish Balay 173179bdfe76SSatish Balay Output Parameter: 173279bdfe76SSatish Balay . A - the matrix 173379bdfe76SSatish Balay 173479bdfe76SSatish Balay Notes: 173579bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 173679bdfe76SSatish Balay (possibly both). 173779bdfe76SSatish Balay 173879bdfe76SSatish Balay Storage Information: 173979bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 174079bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 174179bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 174279bdfe76SSatish Balay local matrix (a rectangular submatrix). 174379bdfe76SSatish Balay 174479bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 174579bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 174679bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 174779bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 174879bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 174979bdfe76SSatish Balay 175079bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 175179bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 175279bdfe76SSatish Balay 175379bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 175479bdfe76SSatish Balay $ ------------------- 175579bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 175679bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 175779bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 175879bdfe76SSatish Balay $ ------------------- 175979bdfe76SSatish Balay $ 176079bdfe76SSatish Balay 176179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 176279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 176379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 176457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 176579bdfe76SSatish Balay 1766d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1767d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 176879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 176992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 177092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 17716da5968aSLois Curfman McInnes matrices. 177279bdfe76SSatish Balay 177392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 177479bdfe76SSatish Balay 177579bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 177679bdfe76SSatish Balay @*/ 177779bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 177879bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 177979bdfe76SSatish Balay { 178079bdfe76SSatish Balay Mat B; 178179bdfe76SSatish Balay Mat_MPIBAIJ *b; 17824c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 178379bdfe76SSatish Balay 1784d64ed03dSBarry Smith PetscFunctionBegin; 1785a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 17863914022bSBarry Smith 17873914022bSBarry Smith MPI_Comm_size(comm,&size); 17883914022bSBarry Smith if (size == 1) { 17893914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 17903914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 17913914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 17923a40ed3dSBarry Smith PetscFunctionReturn(0); 17933914022bSBarry Smith } 17943914022bSBarry Smith 179579bdfe76SSatish Balay *A = 0; 1796d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 179779bdfe76SSatish Balay PLogObjectCreate(B); 179879bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 179979bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 180079bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 18014c50302cSBarry Smith 180279bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 180379bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 180490f02eecSBarry Smith B->mapping = 0; 180579bdfe76SSatish Balay B->factor = 0; 180679bdfe76SSatish Balay B->assembled = PETSC_FALSE; 180779bdfe76SSatish Balay 1808e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 180979bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 181079bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 181179bdfe76SSatish Balay 1812d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1813a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1814d64ed03dSBarry Smith } 1815a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1816a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1817a8c6a408SBarry Smith } 1818a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1819a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1820a8c6a408SBarry Smith } 1821cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1822cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 182379bdfe76SSatish Balay 182479bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 182579bdfe76SSatish Balay work[0] = m; work[1] = n; 182679bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1827ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 182879bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 182979bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 183079bdfe76SSatish Balay } 183179bdfe76SSatish Balay if (m == PETSC_DECIDE) { 183279bdfe76SSatish Balay Mbs = M/bs; 1833a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 183479bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 183579bdfe76SSatish Balay m = mbs*bs; 183679bdfe76SSatish Balay } 183779bdfe76SSatish Balay if (n == PETSC_DECIDE) { 183879bdfe76SSatish Balay Nbs = N/bs; 1839a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 184079bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 184179bdfe76SSatish Balay n = nbs*bs; 184279bdfe76SSatish Balay } 1843a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 1844a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1845a8c6a408SBarry Smith } 184679bdfe76SSatish Balay 184779bdfe76SSatish Balay b->m = m; B->m = m; 184879bdfe76SSatish Balay b->n = n; B->n = n; 184979bdfe76SSatish Balay b->N = N; B->N = N; 185079bdfe76SSatish Balay b->M = M; B->M = M; 185179bdfe76SSatish Balay b->bs = bs; 185279bdfe76SSatish Balay b->bs2 = bs*bs; 185379bdfe76SSatish Balay b->mbs = mbs; 185479bdfe76SSatish Balay b->nbs = nbs; 185579bdfe76SSatish Balay b->Mbs = Mbs; 185679bdfe76SSatish Balay b->Nbs = Nbs; 185779bdfe76SSatish Balay 185879bdfe76SSatish Balay /* build local table of row and column ownerships */ 185979bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1860f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 18610ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 1862ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 186379bdfe76SSatish Balay b->rowners[0] = 0; 186479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 186579bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 186679bdfe76SSatish Balay } 186779bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 186879bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 18694fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 18704fa0d573SSatish Balay b->rend_bs = b->rend * bs; 18714fa0d573SSatish Balay 1872ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 187379bdfe76SSatish Balay b->cowners[0] = 0; 187479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 187579bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 187679bdfe76SSatish Balay } 187779bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 187879bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 18794fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 18804fa0d573SSatish Balay b->cend_bs = b->cend * bs; 188179bdfe76SSatish Balay 1882a07cd24cSSatish Balay 188379bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1884029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 188579bdfe76SSatish Balay PLogObjectParent(B,b->A); 188679bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1887029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 188879bdfe76SSatish Balay PLogObjectParent(B,b->B); 188979bdfe76SSatish Balay 189079bdfe76SSatish Balay /* build cache for off array entries formed */ 189179bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 189290f02eecSBarry Smith b->donotstash = 0; 189379bdfe76SSatish Balay b->colmap = 0; 189479bdfe76SSatish Balay b->garray = 0; 189579bdfe76SSatish Balay b->roworiented = 1; 189679bdfe76SSatish Balay 189730793edcSSatish Balay /* stuff used in block assembly */ 189830793edcSSatish Balay b->barray = 0; 189930793edcSSatish Balay 190079bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 190179bdfe76SSatish Balay b->lvec = 0; 190279bdfe76SSatish Balay b->Mvctx = 0; 190379bdfe76SSatish Balay 190479bdfe76SSatish Balay /* stuff for MatGetRow() */ 190579bdfe76SSatish Balay b->rowindices = 0; 190679bdfe76SSatish Balay b->rowvalues = 0; 190779bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 190879bdfe76SSatish Balay 1909a07cd24cSSatish Balay /* hash table stuff */ 1910a07cd24cSSatish Balay b->ht = 0; 19110bdbc534SSatish Balay b->ht_size = 0; 1912a07cd24cSSatish Balay 191379bdfe76SSatish Balay *A = B; 19143a40ed3dSBarry Smith PetscFunctionReturn(0); 191579bdfe76SSatish Balay } 1916026e39d0SSatish Balay 19175615d1e5SSatish Balay #undef __FUNC__ 19185615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 19190ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 19200ac07820SSatish Balay { 19210ac07820SSatish Balay Mat mat; 19220ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 19230ac07820SSatish Balay int ierr, len=0, flg; 19240ac07820SSatish Balay 1925d64ed03dSBarry Smith PetscFunctionBegin; 19260ac07820SSatish Balay *newmat = 0; 1927d4bb536fSBarry Smith PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 19280ac07820SSatish Balay PLogObjectCreate(mat); 19290ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 19300ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 19310ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 19320ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 19330ac07820SSatish Balay mat->factor = matin->factor; 19340ac07820SSatish Balay mat->assembled = PETSC_TRUE; 19350ac07820SSatish Balay 19360ac07820SSatish Balay a->m = mat->m = oldmat->m; 19370ac07820SSatish Balay a->n = mat->n = oldmat->n; 19380ac07820SSatish Balay a->M = mat->M = oldmat->M; 19390ac07820SSatish Balay a->N = mat->N = oldmat->N; 19400ac07820SSatish Balay 19410ac07820SSatish Balay a->bs = oldmat->bs; 19420ac07820SSatish Balay a->bs2 = oldmat->bs2; 19430ac07820SSatish Balay a->mbs = oldmat->mbs; 19440ac07820SSatish Balay a->nbs = oldmat->nbs; 19450ac07820SSatish Balay a->Mbs = oldmat->Mbs; 19460ac07820SSatish Balay a->Nbs = oldmat->Nbs; 19470ac07820SSatish Balay 19480ac07820SSatish Balay a->rstart = oldmat->rstart; 19490ac07820SSatish Balay a->rend = oldmat->rend; 19500ac07820SSatish Balay a->cstart = oldmat->cstart; 19510ac07820SSatish Balay a->cend = oldmat->cend; 19520ac07820SSatish Balay a->size = oldmat->size; 19530ac07820SSatish Balay a->rank = oldmat->rank; 1954e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 19550ac07820SSatish Balay a->rowvalues = 0; 19560ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 195730793edcSSatish Balay a->barray = 0; 19580ac07820SSatish Balay 19590ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1960f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 19610ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 19620ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 19630ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 19640ac07820SSatish Balay if (oldmat->colmap) { 19650ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 19660ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 19670ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 19680ac07820SSatish Balay } else a->colmap = 0; 19690ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 19700ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 19710ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 19720ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 19730ac07820SSatish Balay } else a->garray = 0; 19740ac07820SSatish Balay 19750ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 19760ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 19770ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 19780ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 19790ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 19800ac07820SSatish Balay PLogObjectParent(mat,a->A); 19810ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 19820ac07820SSatish Balay PLogObjectParent(mat,a->B); 19830ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 19840ac07820SSatish Balay if (flg) { 19850ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 19860ac07820SSatish Balay } 19870ac07820SSatish Balay *newmat = mat; 19883a40ed3dSBarry Smith PetscFunctionReturn(0); 19890ac07820SSatish Balay } 199057b952d6SSatish Balay 199157b952d6SSatish Balay #include "sys.h" 199257b952d6SSatish Balay 19935615d1e5SSatish Balay #undef __FUNC__ 19945615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 199557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 199657b952d6SSatish Balay { 199757b952d6SSatish Balay Mat A; 199857b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 199957b952d6SSatish Balay Scalar *vals,*buf; 200057b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 200157b952d6SSatish Balay MPI_Status status; 2002cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 200357b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 200457b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 200557b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 200657b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 200757b952d6SSatish Balay 2008d64ed03dSBarry Smith PetscFunctionBegin; 200957b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 201057b952d6SSatish Balay bs2 = bs*bs; 201157b952d6SSatish Balay 201257b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 201357b952d6SSatish Balay if (!rank) { 201457b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2015e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2016a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2017d64ed03dSBarry Smith if (header[3] < 0) { 2018a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2019d64ed03dSBarry Smith } 20206c5fab8fSBarry Smith } 2021d64ed03dSBarry Smith 2022ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 202357b952d6SSatish Balay M = header[1]; N = header[2]; 202457b952d6SSatish Balay 2025a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 202657b952d6SSatish Balay 202757b952d6SSatish Balay /* 202857b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 202957b952d6SSatish Balay divisible by the blocksize 203057b952d6SSatish Balay */ 203157b952d6SSatish Balay Mbs = M/bs; 203257b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 203357b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 203457b952d6SSatish Balay else Mbs++; 203557b952d6SSatish Balay if (extra_rows &&!rank) { 2036b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 203757b952d6SSatish Balay } 2038537820f0SBarry Smith 203957b952d6SSatish Balay /* determine ownership of all rows */ 204057b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 204157b952d6SSatish Balay m = mbs * bs; 2042cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2043cee3aa6bSSatish Balay browners = rowners + size + 1; 2044ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 204557b952d6SSatish Balay rowners[0] = 0; 2046cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2047cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 204857b952d6SSatish Balay rstart = rowners[rank]; 204957b952d6SSatish Balay rend = rowners[rank+1]; 205057b952d6SSatish Balay 205157b952d6SSatish Balay /* distribute row lengths to all processors */ 205257b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 205357b952d6SSatish Balay if (!rank) { 205457b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2055e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 205657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 205757b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2058cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2059ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 206057b952d6SSatish Balay PetscFree(sndcounts); 2061d64ed03dSBarry Smith } else { 2062ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 206357b952d6SSatish Balay } 206457b952d6SSatish Balay 206557b952d6SSatish Balay if (!rank) { 206657b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 206757b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 206857b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 206957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 207057b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 207157b952d6SSatish Balay procsnz[i] += rowlengths[j]; 207257b952d6SSatish Balay } 207357b952d6SSatish Balay } 207457b952d6SSatish Balay PetscFree(rowlengths); 207557b952d6SSatish Balay 207657b952d6SSatish Balay /* determine max buffer needed and allocate it */ 207757b952d6SSatish Balay maxnz = 0; 207857b952d6SSatish Balay for ( i=0; i<size; i++ ) { 207957b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 208057b952d6SSatish Balay } 208157b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 208257b952d6SSatish Balay 208357b952d6SSatish Balay /* read in my part of the matrix column indices */ 208457b952d6SSatish Balay nz = procsnz[0]; 208557b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 208657b952d6SSatish Balay mycols = ibuf; 2087cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2088e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2089cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2090cee3aa6bSSatish Balay 209157b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 209257b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 209357b952d6SSatish Balay nz = procsnz[i]; 2094e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2095ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 209657b952d6SSatish Balay } 209757b952d6SSatish Balay /* read in the stuff for the last proc */ 209857b952d6SSatish Balay if ( size != 1 ) { 209957b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2100e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 210157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2102ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 210357b952d6SSatish Balay } 210457b952d6SSatish Balay PetscFree(cols); 2105d64ed03dSBarry Smith } else { 210657b952d6SSatish Balay /* determine buffer space needed for message */ 210757b952d6SSatish Balay nz = 0; 210857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 210957b952d6SSatish Balay nz += locrowlens[i]; 211057b952d6SSatish Balay } 211157b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 211257b952d6SSatish Balay mycols = ibuf; 211357b952d6SSatish Balay /* receive message of column indices*/ 2114ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2115ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2116a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 211757b952d6SSatish Balay } 211857b952d6SSatish Balay 211957b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2120cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2121cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 212257b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2123cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 212457b952d6SSatish Balay masked1 = mask + Mbs; 212557b952d6SSatish Balay masked2 = masked1 + Mbs; 212657b952d6SSatish Balay rowcount = 0; nzcount = 0; 212757b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 212857b952d6SSatish Balay dcount = 0; 212957b952d6SSatish Balay odcount = 0; 213057b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 213157b952d6SSatish Balay kmax = locrowlens[rowcount]; 213257b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 213357b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 213457b952d6SSatish Balay if (!mask[tmp]) { 213557b952d6SSatish Balay mask[tmp] = 1; 213657b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 213757b952d6SSatish Balay else masked1[dcount++] = tmp; 213857b952d6SSatish Balay } 213957b952d6SSatish Balay } 214057b952d6SSatish Balay rowcount++; 214157b952d6SSatish Balay } 2142cee3aa6bSSatish Balay 214357b952d6SSatish Balay dlens[i] = dcount; 214457b952d6SSatish Balay odlens[i] = odcount; 2145cee3aa6bSSatish Balay 214657b952d6SSatish Balay /* zero out the mask elements we set */ 214757b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 214857b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 214957b952d6SSatish Balay } 2150cee3aa6bSSatish Balay 215157b952d6SSatish Balay /* create our matrix */ 2152537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2153537820f0SBarry Smith CHKERRQ(ierr); 215457b952d6SSatish Balay A = *newmat; 21556d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 215657b952d6SSatish Balay 215757b952d6SSatish Balay if (!rank) { 215857b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 215957b952d6SSatish Balay /* read in my part of the matrix numerical values */ 216057b952d6SSatish Balay nz = procsnz[0]; 216157b952d6SSatish Balay vals = buf; 2162cee3aa6bSSatish Balay mycols = ibuf; 2163cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2164e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2165cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2166537820f0SBarry Smith 216757b952d6SSatish Balay /* insert into matrix */ 216857b952d6SSatish Balay jj = rstart*bs; 216957b952d6SSatish Balay for ( i=0; i<m; i++ ) { 217057b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 217157b952d6SSatish Balay mycols += locrowlens[i]; 217257b952d6SSatish Balay vals += locrowlens[i]; 217357b952d6SSatish Balay jj++; 217457b952d6SSatish Balay } 217557b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 217657b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 217757b952d6SSatish Balay nz = procsnz[i]; 217857b952d6SSatish Balay vals = buf; 2179e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2180ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 218157b952d6SSatish Balay } 218257b952d6SSatish Balay /* the last proc */ 218357b952d6SSatish Balay if ( size != 1 ){ 218457b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2185cee3aa6bSSatish Balay vals = buf; 2186e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 218757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2188ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 218957b952d6SSatish Balay } 219057b952d6SSatish Balay PetscFree(procsnz); 2191d64ed03dSBarry Smith } else { 219257b952d6SSatish Balay /* receive numeric values */ 219357b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 219457b952d6SSatish Balay 219557b952d6SSatish Balay /* receive message of values*/ 219657b952d6SSatish Balay vals = buf; 2197cee3aa6bSSatish Balay mycols = ibuf; 2198ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2199ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2200a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 220157b952d6SSatish Balay 220257b952d6SSatish Balay /* insert into matrix */ 220357b952d6SSatish Balay jj = rstart*bs; 2204cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 220557b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 220657b952d6SSatish Balay mycols += locrowlens[i]; 220757b952d6SSatish Balay vals += locrowlens[i]; 220857b952d6SSatish Balay jj++; 220957b952d6SSatish Balay } 221057b952d6SSatish Balay } 221157b952d6SSatish Balay PetscFree(locrowlens); 221257b952d6SSatish Balay PetscFree(buf); 221357b952d6SSatish Balay PetscFree(ibuf); 221457b952d6SSatish Balay PetscFree(rowners); 221557b952d6SSatish Balay PetscFree(dlens); 2216cee3aa6bSSatish Balay PetscFree(mask); 22176d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 22186d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 22193a40ed3dSBarry Smith PetscFunctionReturn(0); 222057b952d6SSatish Balay } 222157b952d6SSatish Balay 222257b952d6SSatish Balay 2223