1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*44e6884eSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.94 1998/01/12 17:10:22 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 143b2fbd54SBarry Smith 15537820f0SBarry Smith /* 16537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1957b952d6SSatish Balay length of colmap equals the global matrix length. 2057b952d6SSatish Balay */ 215615d1e5SSatish Balay #undef __FUNC__ 225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2457b952d6SSatish Balay { 2557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 2857b952d6SSatish Balay 29d64ed03dSBarry Smith PetscFunctionBegin; 30758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3257b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 343a40ed3dSBarry Smith PetscFunctionReturn(0); 3557b952d6SSatish Balay } 3657b952d6SSatish Balay 3780c1aa95SSatish Balay #define CHUNKSIZE 10 3880c1aa95SSatish Balay 39f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4080c1aa95SSatish Balay { \ 4180c1aa95SSatish Balay \ 4280c1aa95SSatish Balay brow = row/bs; \ 4380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 4580c1aa95SSatish Balay bcol = col/bs; \ 4680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 47ab26458aSBarry Smith low = 0; high = nrow; \ 48ab26458aSBarry Smith while (high-low > 3) { \ 49ab26458aSBarry Smith t = (low+high)/2; \ 50ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 51ab26458aSBarry Smith else low = t; \ 52ab26458aSBarry Smith } \ 53ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 5480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 5580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 5680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 57eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 58eada6651SSatish Balay else *bap = value; \ 59ac7a638eSSatish Balay goto a_noinsert; \ 6080c1aa95SSatish Balay } \ 6180c1aa95SSatish Balay } \ 6289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 63a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 6480c1aa95SSatish Balay if (nrow >= rmax) { \ 6580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 6680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 6780c1aa95SSatish Balay Scalar *new_a; \ 6880c1aa95SSatish Balay \ 69a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7089280ab3SLois Curfman McInnes \ 7180c1aa95SSatish Balay /* malloc new storage space */ \ 7280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 7380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 7480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 7580c1aa95SSatish Balay new_i = new_j + new_nz; \ 7680c1aa95SSatish Balay \ 7780c1aa95SSatish Balay /* copy over old data into new slots */ \ 7880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 7980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 8180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 8280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 8380c1aa95SSatish Balay len*sizeof(int)); \ 8480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 8580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 8680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 8780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 8880c1aa95SSatish Balay /* free up old matrix storage */ \ 8980c1aa95SSatish Balay PetscFree(a->a); \ 9080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 9180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 9280c1aa95SSatish Balay a->singlemalloc = 1; \ 9380c1aa95SSatish Balay \ 9480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 9680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 9780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 9880c1aa95SSatish Balay a->reallocs++; \ 9980c1aa95SSatish Balay a->nz++; \ 10080c1aa95SSatish Balay } \ 10180c1aa95SSatish Balay N = nrow++ - 1; \ 10280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 10380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 10480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 10580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 10680c1aa95SSatish Balay } \ 10780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 10880c1aa95SSatish Balay rp[_i] = bcol; \ 10980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 110ac7a638eSSatish Balay a_noinsert:; \ 11180c1aa95SSatish Balay ailen[brow] = nrow; \ 11280c1aa95SSatish Balay } 11357b952d6SSatish Balay 114ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115ac7a638eSSatish Balay { \ 116ac7a638eSSatish Balay \ 117ac7a638eSSatish Balay brow = row/bs; \ 118ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 120ac7a638eSSatish Balay bcol = col/bs; \ 121ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 122ac7a638eSSatish Balay low = 0; high = nrow; \ 123ac7a638eSSatish Balay while (high-low > 3) { \ 124ac7a638eSSatish Balay t = (low+high)/2; \ 125ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 126ac7a638eSSatish Balay else low = t; \ 127ac7a638eSSatish Balay } \ 128ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 129ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 130ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 131ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 132ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 133ac7a638eSSatish Balay else *bap = value; \ 134ac7a638eSSatish Balay goto b_noinsert; \ 135ac7a638eSSatish Balay } \ 136ac7a638eSSatish Balay } \ 13789280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 138a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139ac7a638eSSatish Balay if (nrow >= rmax) { \ 140ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14174c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142ac7a638eSSatish Balay Scalar *new_a; \ 143ac7a638eSSatish Balay \ 144a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 14589280ab3SLois Curfman McInnes \ 146ac7a638eSSatish Balay /* malloc new storage space */ \ 14774c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 150ac7a638eSSatish Balay new_i = new_j + new_nz; \ 151ac7a638eSSatish Balay \ 152ac7a638eSSatish Balay /* copy over old data into new slots */ \ 153ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 15474c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158ac7a638eSSatish Balay len*sizeof(int)); \ 159ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163ac7a638eSSatish Balay /* free up old matrix storage */ \ 16474c639caSSatish Balay PetscFree(b->a); \ 16574c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16774c639caSSatish Balay b->singlemalloc = 1; \ 168ac7a638eSSatish Balay \ 169ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 17174c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 17274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 17374c639caSSatish Balay b->reallocs++; \ 17474c639caSSatish Balay b->nz++; \ 175ac7a638eSSatish Balay } \ 176ac7a638eSSatish Balay N = nrow++ - 1; \ 177ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 178ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 179ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 180ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183ac7a638eSSatish Balay rp[_i] = bcol; \ 184ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 185ac7a638eSSatish Balay b_noinsert:; \ 186ac7a638eSSatish Balay bilen[brow] = nrow; \ 187ac7a638eSSatish Balay } 188ac7a638eSSatish Balay 1895615d1e5SSatish Balay #undef __FUNC__ 1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 19257b952d6SSatish Balay { 19357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 19457b952d6SSatish Balay Scalar value; 1954fa0d573SSatish Balay int ierr,i,j,row,col; 1964fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1974fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1984fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 19957b952d6SSatish Balay 200eada6651SSatish Balay /* Some Variables required in the macro */ 20180c1aa95SSatish Balay Mat A = baij->A; 20280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204ac7a638eSSatish Balay Scalar *aa=a->a; 205ac7a638eSSatish Balay 206ac7a638eSSatish Balay Mat B = baij->B; 207ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209ac7a638eSSatish Balay Scalar *ba=b->a; 210ac7a638eSSatish Balay 211ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 213ac7a638eSSatish Balay Scalar *ap,*bap; 21480c1aa95SSatish Balay 215d64ed03dSBarry Smith PetscFunctionBegin; 21657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 218a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220639f9d9dSBarry Smith #endif 22157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 22257b952d6SSatish Balay row = im[i] - rstart_orig; 22357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 22457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 22557b952d6SSatish Balay col = in[j] - cstart_orig; 22657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22957b952d6SSatish Balay } 2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 231a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233639f9d9dSBarry Smith #endif 23457b952d6SSatish Balay else { 23557b952d6SSatish Balay if (mat->was_assembled) { 236905e6a2fSBarry Smith if (!baij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 24057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 24157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 24257b952d6SSatish Balay col = in[j]; 2439bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2449bf004c3SSatish Balay B = baij->B; 2459bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2469bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2479bf004c3SSatish Balay ba=b->a; 24857b952d6SSatish Balay } 249d64ed03dSBarry Smith } else col = in[j]; 25057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 255d64ed03dSBarry Smith } else { 25690f02eecSBarry Smith if (roworiented && !baij->donotstash) { 25757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258d64ed03dSBarry Smith } else { 25990f02eecSBarry Smith if (!baij->donotstash) { 26057b952d6SSatish Balay row = im[i]; 26157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 26257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 26357b952d6SSatish Balay } 26457b952d6SSatish Balay } 26557b952d6SSatish Balay } 26657b952d6SSatish Balay } 26790f02eecSBarry Smith } 2683a40ed3dSBarry Smith PetscFunctionReturn(0); 26957b952d6SSatish Balay } 27057b952d6SSatish Balay 271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272ab26458aSBarry Smith #undef __FUNC__ 273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275ab26458aSBarry Smith { 276ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27730793edcSSatish Balay Scalar *value,*barray=baij->barray; 278abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 279ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 280ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 281ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282ab26458aSBarry Smith 28330793edcSSatish Balay if(!barray) { 28447513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 28530793edcSSatish Balay } 28630793edcSSatish Balay 287ab26458aSBarry Smith if (roworiented) { 288ab26458aSBarry Smith stepval = (n-1)*bs; 289ab26458aSBarry Smith } else { 290ab26458aSBarry Smith stepval = (m-1)*bs; 291ab26458aSBarry Smith } 292ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 294a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295a8c6a408SBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296ab26458aSBarry Smith #endif 297ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 298ab26458aSBarry Smith row = im[i] - rstart; 299ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 30015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 30115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 30215b57d14SSatish Balay barray = v + i*bs2; 30315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 30415b57d14SSatish Balay barray = v + j*bs2; 30515b57d14SSatish Balay } else { /* Here a copy is required */ 306ab26458aSBarry Smith if (roworiented) { 307ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 308ab26458aSBarry Smith } else { 309ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 310abef11f7SSatish Balay } 31147513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 31247513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 31330793edcSSatish Balay *barray++ = *value++; 31447513183SBarry Smith } 31547513183SBarry Smith } 31630793edcSSatish Balay barray -=bs2; 31715b57d14SSatish Balay } 318abef11f7SSatish Balay 319abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 320abef11f7SSatish Balay col = in[j] - cstart; 32130793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322ab26458aSBarry Smith } 3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 324a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325a8c6a408SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326ab26458aSBarry Smith #endif 327ab26458aSBarry Smith else { 328ab26458aSBarry Smith if (mat->was_assembled) { 329ab26458aSBarry Smith if (!baij->colmap) { 330ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331ab26458aSBarry Smith } 332a5eb4965SSatish Balay 3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 334a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335a5eb4965SSatish Balay #endif 336a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 337ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339ab26458aSBarry Smith col = in[j]; 340ab26458aSBarry Smith } 341ab26458aSBarry Smith } 342ab26458aSBarry Smith else col = in[j]; 34330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344ab26458aSBarry Smith } 345ab26458aSBarry Smith } 346d64ed03dSBarry Smith } else { 347ab26458aSBarry Smith if (!baij->donotstash) { 348ab26458aSBarry Smith if (roworiented ) { 349abef11f7SSatish Balay row = im[i]*bs; 350abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 351abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 352abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 353abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355abef11f7SSatish Balay } 356ab26458aSBarry Smith } 357ab26458aSBarry Smith } 358d64ed03dSBarry Smith } else { 359ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 360abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 361abef11f7SSatish Balay col = in[j]*bs; 362abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 363abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365ab26458aSBarry Smith } 366ab26458aSBarry Smith } 367ab26458aSBarry Smith } 368abef11f7SSatish Balay } 369abef11f7SSatish Balay } 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 3723a40ed3dSBarry Smith PetscFunctionReturn(0); 373ab26458aSBarry Smith } 3740bdbc534SSatish Balay #include <math.h> 3750bdbc534SSatish Balay #define HASH_KEY 0.6180339887 3760bdbc534SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377ab26458aSBarry Smith 3785615d1e5SSatish Balay #undef __FUNC__ 3790bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 3800bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3810bdbc534SSatish Balay { 3820bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3830bdbc534SSatish Balay int ierr,i,j,row,col; 3840bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 3850bdbc534SSatish Balay int rend_orig=baij->rend_bs; 3860bdbc534SSatish Balay 3870bdbc534SSatish Balay int h1,key,size=baij->ht_size,k,bs=baij->bs; 3880bdbc534SSatish Balay double * HT = baij->ht; 3890bdbc534SSatish Balay Scalar ** HD = (Scalar **)(HT + size),value; 3900bdbc534SSatish Balay 3910bdbc534SSatish Balay 3920bdbc534SSatish Balay PetscFunctionBegin; 3930bdbc534SSatish Balay 3940bdbc534SSatish Balay if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 3950bdbc534SSatish Balay 3960bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 3970bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 3980bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 3990bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4000bdbc534SSatish Balay #endif 4010bdbc534SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 4020bdbc534SSatish Balay row = im[i]; 4030bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4040bdbc534SSatish Balay col = in[j]; 4050bdbc534SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4060bdbc534SSatish Balay /* Look up into the Hash Table */ 4070bdbc534SSatish Balay key = (row/bs)*baij->Nbs+(col/bs)+1; 4080bdbc534SSatish Balay h1 = HASH1(size,key); 4090bdbc534SSatish Balay 4100bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 4110bdbc534SSatish Balay if (HT[(h1+k)%size] == key) { 4120bdbc534SSatish Balay if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 4130bdbc534SSatish Balay else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 4140bdbc534SSatish Balay break; 4150bdbc534SSatish Balay } 4160bdbc534SSatish Balay } 4170bdbc534SSatish Balay if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 4180bdbc534SSatish Balay } 4190bdbc534SSatish Balay } else { 4200bdbc534SSatish Balay if (roworiented && !baij->donotstash) { 4210bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 4220bdbc534SSatish Balay } else { 4230bdbc534SSatish Balay if (!baij->donotstash) { 4240bdbc534SSatish Balay row = im[i]; 4250bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4260bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 4270bdbc534SSatish Balay } 4280bdbc534SSatish Balay } 4290bdbc534SSatish Balay } 4300bdbc534SSatish Balay } 4310bdbc534SSatish Balay } 4320bdbc534SSatish Balay PetscFunctionReturn(0); 4330bdbc534SSatish Balay } 4340bdbc534SSatish Balay 4350bdbc534SSatish Balay #undef __FUNC__ 4360bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 4370bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4380bdbc534SSatish Balay { 4390bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4400bdbc534SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 4410bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 442b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 4430bdbc534SSatish Balay 4440bdbc534SSatish Balay int h1,key,size=baij->ht_size; 4450bdbc534SSatish Balay double * HT = baij->ht; 4460bdbc534SSatish Balay Scalar ** HD = (Scalar **)(HT + size),*value,*baij_a; 4470bdbc534SSatish Balay 4480bdbc534SSatish Balay 4490bdbc534SSatish Balay if (roworiented) { 4500bdbc534SSatish Balay stepval = (n-1)*bs; 4510bdbc534SSatish Balay } else { 4520bdbc534SSatish Balay stepval = (m-1)*bs; 4530bdbc534SSatish Balay } 4540bdbc534SSatish Balay for ( i=0; i<m; i++ ) { 4550bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g) 4560bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4570bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4580bdbc534SSatish Balay #endif 4590bdbc534SSatish Balay if (im[i] >= rstart && im[i] < rend) { 4600bdbc534SSatish Balay row = im[i]; 4610bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 4620bdbc534SSatish Balay col = in[j]; 4630bdbc534SSatish Balay 4640bdbc534SSatish Balay /* Look up into the Hash Table */ 4650bdbc534SSatish Balay key = row*baij->Nbs+col+1; 4660bdbc534SSatish Balay h1 = HASH1(size,key); 4670bdbc534SSatish Balay 4680bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 4690bdbc534SSatish Balay if (HT[(h1+k)%size] == key) { 4700bdbc534SSatish Balay baij_a = HD[(h1+k)%size]; 4710bdbc534SSatish Balay 4720bdbc534SSatish Balay if (roworiented) { 4730bdbc534SSatish Balay value = v + i*(stepval+bs)*bs + j*bs; 474b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 475b4cc0f5aSSatish Balay for ( jj=0; jj<bs2; jj+=bs ) { 476b4cc0f5aSSatish Balay if (addv == ADD_VALUES) baij_a[jj] += *value++; 477b4cc0f5aSSatish Balay else baij_a[jj] = *value++; 478b4cc0f5aSSatish Balay } 479b4cc0f5aSSatish Balay } 480b4cc0f5aSSatish Balay 4810bdbc534SSatish Balay } else { 4820bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 483b4cc0f5aSSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 4840bdbc534SSatish Balay for ( jj=0; jj<bs; jj++ ) { 485b4cc0f5aSSatish Balay if (addv == ADD_VALUES) baij_a[jj] += *value++; 486b4cc0f5aSSatish Balay else baij_a[jj] = *value++; 487b4cc0f5aSSatish Balay } 4880bdbc534SSatish Balay } 4890bdbc534SSatish Balay } 4900bdbc534SSatish Balay break; 4910bdbc534SSatish Balay } 4920bdbc534SSatish Balay } 4930bdbc534SSatish Balay if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 4940bdbc534SSatish Balay } 4950bdbc534SSatish Balay } else { 4960bdbc534SSatish Balay if (!baij->donotstash) { 4970bdbc534SSatish Balay if (roworiented ) { 4980bdbc534SSatish Balay row = im[i]*bs; 4990bdbc534SSatish Balay value = v + i*(stepval+bs)*bs; 5000bdbc534SSatish Balay for ( j=0; j<bs; j++,row++ ) { 5010bdbc534SSatish Balay for ( k=0; k<n; k++ ) { 5020bdbc534SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 5030bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5040bdbc534SSatish Balay } 5050bdbc534SSatish Balay } 5060bdbc534SSatish Balay } 5070bdbc534SSatish Balay } else { 5080bdbc534SSatish Balay for ( j=0; j<n; j++ ) { 5090bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 5100bdbc534SSatish Balay col = in[j]*bs; 5110bdbc534SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 5120bdbc534SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 5130bdbc534SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 5140bdbc534SSatish Balay } 5150bdbc534SSatish Balay } 5160bdbc534SSatish Balay } 5170bdbc534SSatish Balay } 5180bdbc534SSatish Balay } 5190bdbc534SSatish Balay } 5200bdbc534SSatish Balay } 5210bdbc534SSatish Balay PetscFunctionReturn(0); 5220bdbc534SSatish Balay } 5230bdbc534SSatish Balay #undef __FUNC__ 5245615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 525ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 526d6de1c52SSatish Balay { 527d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 528d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 529d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 530d6de1c52SSatish Balay 531d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 532a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 533a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 534d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 535d6de1c52SSatish Balay row = idxm[i] - bsrstart; 536d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 537a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 538a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 539d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 540d6de1c52SSatish Balay col = idxn[j] - bscstart; 541d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 542d64ed03dSBarry Smith } else { 543905e6a2fSBarry Smith if (!baij->colmap) { 544905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 545905e6a2fSBarry Smith } 546e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 547dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 548d9d09a02SSatish Balay else { 549dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 550d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 551d6de1c52SSatish Balay } 552d6de1c52SSatish Balay } 553d6de1c52SSatish Balay } 554d64ed03dSBarry Smith } else { 555a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 556d6de1c52SSatish Balay } 557d6de1c52SSatish Balay } 5583a40ed3dSBarry Smith PetscFunctionReturn(0); 559d6de1c52SSatish Balay } 560d6de1c52SSatish Balay 5615615d1e5SSatish Balay #undef __FUNC__ 5625615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 563ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 564d6de1c52SSatish Balay { 565d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 566d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 567acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 568d6de1c52SSatish Balay double sum = 0.0; 569d6de1c52SSatish Balay Scalar *v; 570d6de1c52SSatish Balay 571d64ed03dSBarry Smith PetscFunctionBegin; 572d6de1c52SSatish Balay if (baij->size == 1) { 573d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 574d6de1c52SSatish Balay } else { 575d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 576d6de1c52SSatish Balay v = amat->a; 577d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 5783a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 579d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 580d6de1c52SSatish Balay #else 581d6de1c52SSatish Balay sum += (*v)*(*v); v++; 582d6de1c52SSatish Balay #endif 583d6de1c52SSatish Balay } 584d6de1c52SSatish Balay v = bmat->a; 585d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 5863a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 587d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 588d6de1c52SSatish Balay #else 589d6de1c52SSatish Balay sum += (*v)*(*v); v++; 590d6de1c52SSatish Balay #endif 591d6de1c52SSatish Balay } 592ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 593d6de1c52SSatish Balay *norm = sqrt(*norm); 594d64ed03dSBarry Smith } else { 595e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 596d6de1c52SSatish Balay } 597d64ed03dSBarry Smith } 5983a40ed3dSBarry Smith PetscFunctionReturn(0); 599d6de1c52SSatish Balay } 60057b952d6SSatish Balay 6015615d1e5SSatish Balay #undef __FUNC__ 6025615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 603ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 60457b952d6SSatish Balay { 60557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60657b952d6SSatish Balay MPI_Comm comm = mat->comm; 60757b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 60857b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 60957b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 61057b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 61157b952d6SSatish Balay InsertMode addv; 61257b952d6SSatish Balay Scalar *rvalues,*svalues; 61357b952d6SSatish Balay 614d64ed03dSBarry Smith PetscFunctionBegin; 61557b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 616ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 61757b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 618a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 61957b952d6SSatish Balay } 620e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 62157b952d6SSatish Balay 62257b952d6SSatish Balay /* first count number of contributors to each processor */ 62357b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 62457b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 62557b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 62657b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 62757b952d6SSatish Balay idx = baij->stash.idx[i]; 62857b952d6SSatish Balay for ( j=0; j<size; j++ ) { 62957b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 63057b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 63157b952d6SSatish Balay } 63257b952d6SSatish Balay } 63357b952d6SSatish Balay } 63457b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 63557b952d6SSatish Balay 63657b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 63757b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 638ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 63957b952d6SSatish Balay nreceives = work[rank]; 640ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 64157b952d6SSatish Balay nmax = work[rank]; 64257b952d6SSatish Balay PetscFree(work); 64357b952d6SSatish Balay 64457b952d6SSatish Balay /* post receives: 64557b952d6SSatish Balay 1) each message will consist of ordered pairs 64657b952d6SSatish Balay (global index,value) we store the global index as a double 64757b952d6SSatish Balay to simplify the message passing. 64857b952d6SSatish Balay 2) since we don't know how long each individual message is we 64957b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 65057b952d6SSatish Balay this is a lot of wasted space. 65157b952d6SSatish Balay 65257b952d6SSatish Balay 65357b952d6SSatish Balay This could be done better. 65457b952d6SSatish Balay */ 655f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 656f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 65757b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 658ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 65957b952d6SSatish Balay } 66057b952d6SSatish Balay 66157b952d6SSatish Balay /* do sends: 66257b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 66357b952d6SSatish Balay the ith processor 66457b952d6SSatish Balay */ 66557b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 666d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 66757b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 66857b952d6SSatish Balay starts[0] = 0; 66957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 67057b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 67157b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 67257b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 67357b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 67457b952d6SSatish Balay } 67557b952d6SSatish Balay PetscFree(owner); 67657b952d6SSatish Balay starts[0] = 0; 67757b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 67857b952d6SSatish Balay count = 0; 67957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 68057b952d6SSatish Balay if (procs[i]) { 681ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 68257b952d6SSatish Balay } 68357b952d6SSatish Balay } 68457b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 68557b952d6SSatish Balay 68657b952d6SSatish Balay /* Free cache space */ 687cd1fa5fbSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 68857b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 68957b952d6SSatish Balay 69057b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 69157b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 69257b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 69357b952d6SSatish Balay baij->rmax = nmax; 69457b952d6SSatish Balay 6953a40ed3dSBarry Smith PetscFunctionReturn(0); 69657b952d6SSatish Balay } 697bd7f49f5SSatish Balay 698bd7f49f5SSatish Balay int CreateHashTable(Mat mat,double factor) 699596b8d2eSBarry Smith { 700596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 701596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 702596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 7030bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7040bdbc534SSatish Balay int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 7050bdbc534SSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col; 706a07cd24cSSatish Balay double *HT,key; 707a07cd24cSSatish Balay extern int PetscGlobalRank; 7080bdbc534SSatish Balay Scalar **HD; 709a07cd24cSSatish Balay /* 710a07cd24cSSatish Balay Scalar *aa=a->a,*ba=b->a; 711596b8d2eSBarry Smith static double *HT; 712596b8d2eSBarry Smith static int flag=1; 713a07cd24cSSatish Balay */ 714d64ed03dSBarry Smith PetscFunctionBegin; 7150bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7160bdbc534SSatish Balay size = baij->ht_size; 717596b8d2eSBarry Smith /* Allocate Memory for Hash Table */ 7180bdbc534SSatish Balay if (baij->ht) { 7190bdbc534SSatish Balay PetscFunctionReturn(0); 720596b8d2eSBarry Smith } 7210bdbc534SSatish Balay 7220bdbc534SSatish Balay baij->ht = (double*)PetscMalloc(size*(sizeof(double)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht); 723a07cd24cSSatish Balay HT = baij->ht; 7240bdbc534SSatish Balay HD = (Scalar**)(HT + size); 7250bdbc534SSatish Balay PetscMemzero(HT,size*sizeof(double)+sizeof(Scalar*)); 7260bdbc534SSatish Balay 727596b8d2eSBarry Smith 728596b8d2eSBarry Smith /* Loop Over A */ 7290bdbc534SSatish Balay for ( i=0; i<a->mbs; i++ ) { 730596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 7310bdbc534SSatish Balay row = i+rstart; 7320bdbc534SSatish Balay col = aj[j]+cstart; 733596b8d2eSBarry Smith 7340bdbc534SSatish Balay key = row*baij->Nbs + col + 1; 7350bdbc534SSatish Balay /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 7360bdbc534SSatish Balay h1 = HASH1(size,key); 7370bdbc534SSatish Balay 7380bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7390bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7400bdbc534SSatish Balay HT[(h1+k)%size] = key; 7410bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 742596b8d2eSBarry Smith break; 743596b8d2eSBarry Smith } else ct++; 744596b8d2eSBarry Smith } 745bd7f49f5SSatish Balay if (k> max1) max1 = k; 746596b8d2eSBarry Smith } 747596b8d2eSBarry Smith } 748596b8d2eSBarry Smith /* Loop Over B */ 7490bdbc534SSatish Balay for ( i=0; i<b->mbs; i++ ) { 750596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 7510bdbc534SSatish Balay row = i+rstart; 7520bdbc534SSatish Balay col = garray[bj[j]]; 7530bdbc534SSatish Balay key = row*baij->Nbs + col + 1; 7540bdbc534SSatish Balay /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 755596b8d2eSBarry Smith h1 = HASH1(size,key); 7560bdbc534SSatish Balay for ( k=0; k<size; k++ ){ 7570bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7580bdbc534SSatish Balay HT[(h1+k)%size] = key; 7590bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 760596b8d2eSBarry Smith break; 761596b8d2eSBarry Smith } else ct++; 762596b8d2eSBarry Smith } 763bd7f49f5SSatish Balay if (k> max2) max2 = k; 764596b8d2eSBarry Smith } 765596b8d2eSBarry Smith } 766596b8d2eSBarry Smith 767596b8d2eSBarry Smith /* Print Summary */ 768596b8d2eSBarry Smith for ( i=0,key=0.0,j=0; i<size; i++) 769596b8d2eSBarry Smith if (HT[i]) {j++;} 770596b8d2eSBarry Smith 7710bdbc534SSatish Balay /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 7720bdbc534SSatish Balay PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 7730bdbc534SSatish Balay fflush(stdout); 7743a40ed3dSBarry Smith PetscFunctionReturn(0); 775596b8d2eSBarry Smith } 77657b952d6SSatish Balay 7775615d1e5SSatish Balay #undef __FUNC__ 7785615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 779ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 78057b952d6SSatish Balay { 78157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 78257b952d6SSatish Balay MPI_Status *send_status,recv_status; 78357b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 784596b8d2eSBarry Smith int bs=baij->bs,row,col,other_disassembled,flg; 78557b952d6SSatish Balay Scalar *values,val; 786e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 78757b952d6SSatish Balay 788d64ed03dSBarry Smith PetscFunctionBegin; 78957b952d6SSatish Balay /* wait on receives */ 79057b952d6SSatish Balay while (count) { 791ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 79257b952d6SSatish Balay /* unpack receives into our local space */ 79357b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 794ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 79557b952d6SSatish Balay n = n/3; 79657b952d6SSatish Balay for ( i=0; i<n; i++ ) { 79757b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 79857b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 79957b952d6SSatish Balay val = values[3*i+2]; 80057b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 80157b952d6SSatish Balay col -= baij->cstart*bs; 8026fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 803d64ed03dSBarry Smith } else { 80457b952d6SSatish Balay if (mat->was_assembled) { 805905e6a2fSBarry Smith if (!baij->colmap) { 806905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 807905e6a2fSBarry Smith } 808a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 80957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 81057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 81157b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 81257b952d6SSatish Balay } 81357b952d6SSatish Balay } 8146fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 81557b952d6SSatish Balay } 81657b952d6SSatish Balay } 81757b952d6SSatish Balay count--; 81857b952d6SSatish Balay } 81957b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 82057b952d6SSatish Balay 82157b952d6SSatish Balay /* wait on sends */ 82257b952d6SSatish Balay if (baij->nsends) { 823d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 824ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 82557b952d6SSatish Balay PetscFree(send_status); 82657b952d6SSatish Balay } 82757b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 82857b952d6SSatish Balay 82957b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 83057b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 83157b952d6SSatish Balay 83257b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 83357b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 834ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 83557b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 83657b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 83757b952d6SSatish Balay } 83857b952d6SSatish Balay 8396d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 84057b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 84157b952d6SSatish Balay } 84257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 84357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 84457b952d6SSatish Balay 845596b8d2eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 846*44e6884eSSatish Balay if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 847a07cd24cSSatish Balay double fact = 1.39; 8480bdbc534SSatish Balay CreateHashTable(mat,fact); 8490bdbc534SSatish Balay mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 8500bdbc534SSatish Balay mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 851bd7f49f5SSatish Balay } 85257b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 8533a40ed3dSBarry Smith PetscFunctionReturn(0); 85457b952d6SSatish Balay } 85557b952d6SSatish Balay 8565615d1e5SSatish Balay #undef __FUNC__ 8575615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 85857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 85957b952d6SSatish Balay { 86057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 86157b952d6SSatish Balay int ierr; 86257b952d6SSatish Balay 863d64ed03dSBarry Smith PetscFunctionBegin; 86457b952d6SSatish Balay if (baij->size == 1) { 86557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 866a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 8673a40ed3dSBarry Smith PetscFunctionReturn(0); 86857b952d6SSatish Balay } 86957b952d6SSatish Balay 8705615d1e5SSatish Balay #undef __FUNC__ 8715615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 87257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 87357b952d6SSatish Balay { 87457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 875cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 87657b952d6SSatish Balay FILE *fd; 87757b952d6SSatish Balay ViewerType vtype; 87857b952d6SSatish Balay 879d64ed03dSBarry Smith PetscFunctionBegin; 88057b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 88157b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 88257b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 883639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 8844e220ebcSLois Curfman McInnes MatInfo info; 88557b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 88657b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 8874e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 88857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 88957b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 8904e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 8914e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 8924e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 8934e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 8944e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 8954e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 89657b952d6SSatish Balay fflush(fd); 89757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 89857b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 8993a40ed3dSBarry Smith PetscFunctionReturn(0); 900d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 901bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 9023a40ed3dSBarry Smith PetscFunctionReturn(0); 90357b952d6SSatish Balay } 90457b952d6SSatish Balay } 90557b952d6SSatish Balay 90657b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 90757b952d6SSatish Balay Draw draw; 90857b952d6SSatish Balay PetscTruth isnull; 90957b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 9103a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 91157b952d6SSatish Balay } 91257b952d6SSatish Balay 91357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 91457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 91557b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 91657b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 91757b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 91857b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 91957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 92057b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 92157b952d6SSatish Balay fflush(fd); 92257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 923d64ed03dSBarry Smith } else { 92457b952d6SSatish Balay int size = baij->size; 92557b952d6SSatish Balay rank = baij->rank; 92657b952d6SSatish Balay if (size == 1) { 92757b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 928d64ed03dSBarry Smith } else { 92957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 93057b952d6SSatish Balay Mat A; 93157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 93257b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 93357b952d6SSatish Balay int mbs=baij->mbs; 93457b952d6SSatish Balay Scalar *a; 93557b952d6SSatish Balay 93657b952d6SSatish Balay if (!rank) { 93755843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 938d64ed03dSBarry Smith } else { 93955843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 94057b952d6SSatish Balay } 94157b952d6SSatish Balay PLogObjectParent(mat,A); 94257b952d6SSatish Balay 94357b952d6SSatish Balay /* copy over the A part */ 94457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 94557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 94657b952d6SSatish Balay row = baij->rstart; 94757b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 94857b952d6SSatish Balay 94957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 95057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 95157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 95257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 95357b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 95457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 955cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 956cee3aa6bSSatish Balay col++; a += bs; 95757b952d6SSatish Balay } 95857b952d6SSatish Balay } 95957b952d6SSatish Balay } 96057b952d6SSatish Balay /* copy over the B part */ 96157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 96257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 96357b952d6SSatish Balay row = baij->rstart*bs; 96457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 96557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 96657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 96757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 96857b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 96957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 970cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 971cee3aa6bSSatish Balay col++; a += bs; 97257b952d6SSatish Balay } 97357b952d6SSatish Balay } 97457b952d6SSatish Balay } 97557b952d6SSatish Balay PetscFree(rvals); 9766d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 9776d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 97855843e3eSBarry Smith /* 97955843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 98055843e3eSBarry Smith synchronized across all processors that share the Draw object 98155843e3eSBarry Smith */ 98255843e3eSBarry Smith if (!rank || vtype == DRAW_VIEWER) { 98357b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 98457b952d6SSatish Balay } 98557b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 98657b952d6SSatish Balay } 98757b952d6SSatish Balay } 9883a40ed3dSBarry Smith PetscFunctionReturn(0); 98957b952d6SSatish Balay } 99057b952d6SSatish Balay 99157b952d6SSatish Balay 99257b952d6SSatish Balay 9935615d1e5SSatish Balay #undef __FUNC__ 9945615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 995ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 99657b952d6SSatish Balay { 99757b952d6SSatish Balay Mat mat = (Mat) obj; 99857b952d6SSatish Balay int ierr; 99957b952d6SSatish Balay ViewerType vtype; 100057b952d6SSatish Balay 1001d64ed03dSBarry Smith PetscFunctionBegin; 100257b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 100357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 100457b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 100557b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 10063a40ed3dSBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 10073a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 100857b952d6SSatish Balay } 10093a40ed3dSBarry Smith PetscFunctionReturn(0); 101057b952d6SSatish Balay } 101157b952d6SSatish Balay 10125615d1e5SSatish Balay #undef __FUNC__ 10135615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 1014ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 101579bdfe76SSatish Balay { 101679bdfe76SSatish Balay Mat mat = (Mat) obj; 101779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 101879bdfe76SSatish Balay int ierr; 101979bdfe76SSatish Balay 1020d64ed03dSBarry Smith PetscFunctionBegin; 10213a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 102279bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 102379bdfe76SSatish Balay #endif 102479bdfe76SSatish Balay 102583e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 102679bdfe76SSatish Balay PetscFree(baij->rowners); 102779bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 102879bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 102979bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 103079bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 103179bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 103279bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 103379bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 103430793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 10350bdbc534SSatish Balay if (baij->ht) PetscFree(baij->ht); 103679bdfe76SSatish Balay PetscFree(baij); 103779bdfe76SSatish Balay PLogObjectDestroy(mat); 103879bdfe76SSatish Balay PetscHeaderDestroy(mat); 10393a40ed3dSBarry Smith PetscFunctionReturn(0); 104079bdfe76SSatish Balay } 104179bdfe76SSatish Balay 10425615d1e5SSatish Balay #undef __FUNC__ 10435615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 1044ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1045cee3aa6bSSatish Balay { 1046cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 104747b4a8eaSLois Curfman McInnes int ierr, nt; 1048cee3aa6bSSatish Balay 1049d64ed03dSBarry Smith PetscFunctionBegin; 1050c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 105147b4a8eaSLois Curfman McInnes if (nt != a->n) { 1052a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 105347b4a8eaSLois Curfman McInnes } 1054c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 105547b4a8eaSLois Curfman McInnes if (nt != a->m) { 1056a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 105747b4a8eaSLois Curfman McInnes } 105843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1059cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 106043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1061cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 106243a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 10633a40ed3dSBarry Smith PetscFunctionReturn(0); 1064cee3aa6bSSatish Balay } 1065cee3aa6bSSatish Balay 10665615d1e5SSatish Balay #undef __FUNC__ 10675615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 1068ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069cee3aa6bSSatish Balay { 1070cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1071cee3aa6bSSatish Balay int ierr; 1072d64ed03dSBarry Smith 1073d64ed03dSBarry Smith PetscFunctionBegin; 107443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1075cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 107643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1077cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 10783a40ed3dSBarry Smith PetscFunctionReturn(0); 1079cee3aa6bSSatish Balay } 1080cee3aa6bSSatish Balay 10815615d1e5SSatish Balay #undef __FUNC__ 10825615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 1083ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1084cee3aa6bSSatish Balay { 1085cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1086cee3aa6bSSatish Balay int ierr; 1087cee3aa6bSSatish Balay 1088d64ed03dSBarry Smith PetscFunctionBegin; 1089cee3aa6bSSatish Balay /* do nondiagonal part */ 1090cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1091cee3aa6bSSatish Balay /* send it on its way */ 1092537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1093cee3aa6bSSatish Balay /* do local part */ 1094cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1095cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1096cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1097cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1098639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 10993a40ed3dSBarry Smith PetscFunctionReturn(0); 1100cee3aa6bSSatish Balay } 1101cee3aa6bSSatish Balay 11025615d1e5SSatish Balay #undef __FUNC__ 11035615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1104ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105cee3aa6bSSatish Balay { 1106cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1107cee3aa6bSSatish Balay int ierr; 1108cee3aa6bSSatish Balay 1109d64ed03dSBarry Smith PetscFunctionBegin; 1110cee3aa6bSSatish Balay /* do nondiagonal part */ 1111cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1112cee3aa6bSSatish Balay /* send it on its way */ 1113537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1114cee3aa6bSSatish Balay /* do local part */ 1115cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1116cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1117cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1118cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1119537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 11203a40ed3dSBarry Smith PetscFunctionReturn(0); 1121cee3aa6bSSatish Balay } 1122cee3aa6bSSatish Balay 1123cee3aa6bSSatish Balay /* 1124cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1125cee3aa6bSSatish Balay diagonal block 1126cee3aa6bSSatish Balay */ 11275615d1e5SSatish Balay #undef __FUNC__ 11285615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1129ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1130cee3aa6bSSatish Balay { 1131cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 11323a40ed3dSBarry Smith int ierr; 1133d64ed03dSBarry Smith 1134d64ed03dSBarry Smith PetscFunctionBegin; 1135a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 11363a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 11373a40ed3dSBarry Smith PetscFunctionReturn(0); 1138cee3aa6bSSatish Balay } 1139cee3aa6bSSatish Balay 11405615d1e5SSatish Balay #undef __FUNC__ 11415615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 1142ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1143cee3aa6bSSatish Balay { 1144cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1145cee3aa6bSSatish Balay int ierr; 1146d64ed03dSBarry Smith 1147d64ed03dSBarry Smith PetscFunctionBegin; 1148cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1149cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 11503a40ed3dSBarry Smith PetscFunctionReturn(0); 1151cee3aa6bSSatish Balay } 1152026e39d0SSatish Balay 11535615d1e5SSatish Balay #undef __FUNC__ 11545615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 1155ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 115657b952d6SSatish Balay { 115757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1158d64ed03dSBarry Smith 1159d64ed03dSBarry Smith PetscFunctionBegin; 1160bd7f49f5SSatish Balay if (m) *m = mat->M; 1161bd7f49f5SSatish Balay if (n) *n = mat->N; 11623a40ed3dSBarry Smith PetscFunctionReturn(0); 116357b952d6SSatish Balay } 116457b952d6SSatish Balay 11655615d1e5SSatish Balay #undef __FUNC__ 11665615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1167ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 116857b952d6SSatish Balay { 116957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1170d64ed03dSBarry Smith 1171d64ed03dSBarry Smith PetscFunctionBegin; 117257b952d6SSatish Balay *m = mat->m; *n = mat->N; 11733a40ed3dSBarry Smith PetscFunctionReturn(0); 117457b952d6SSatish Balay } 117557b952d6SSatish Balay 11765615d1e5SSatish Balay #undef __FUNC__ 11775615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1178ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 117957b952d6SSatish Balay { 118057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1181d64ed03dSBarry Smith 1182d64ed03dSBarry Smith PetscFunctionBegin; 118357b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 11843a40ed3dSBarry Smith PetscFunctionReturn(0); 118557b952d6SSatish Balay } 118657b952d6SSatish Balay 1187acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1188acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1189acdf5bf4SSatish Balay 11905615d1e5SSatish Balay #undef __FUNC__ 11915615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1192acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1193acdf5bf4SSatish Balay { 1194acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1195acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1196acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1197d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1198d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1199acdf5bf4SSatish Balay 1200d64ed03dSBarry Smith PetscFunctionBegin; 1201a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1202acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1203acdf5bf4SSatish Balay 1204acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1205acdf5bf4SSatish Balay /* 1206acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1207acdf5bf4SSatish Balay */ 1208acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1209bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1210bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1211acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1212acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1213acdf5bf4SSatish Balay } 1214acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1215acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1216acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1217acdf5bf4SSatish Balay } 1218acdf5bf4SSatish Balay 1219a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1220d9d09a02SSatish Balay lrow = row - brstart; 1221acdf5bf4SSatish Balay 1222acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1223acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1224acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1225acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1226acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1227acdf5bf4SSatish Balay nztot = nzA + nzB; 1228acdf5bf4SSatish Balay 1229acdf5bf4SSatish Balay cmap = mat->garray; 1230acdf5bf4SSatish Balay if (v || idx) { 1231acdf5bf4SSatish Balay if (nztot) { 1232acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1233acdf5bf4SSatish Balay int imark = -1; 1234acdf5bf4SSatish Balay if (v) { 1235acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1236acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1237d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1238acdf5bf4SSatish Balay else break; 1239acdf5bf4SSatish Balay } 1240acdf5bf4SSatish Balay imark = i; 1241acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1242acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1243acdf5bf4SSatish Balay } 1244acdf5bf4SSatish Balay if (idx) { 1245acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1246acdf5bf4SSatish Balay if (imark > -1) { 1247acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1248bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1249acdf5bf4SSatish Balay } 1250acdf5bf4SSatish Balay } else { 1251acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1252d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1253d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1254acdf5bf4SSatish Balay else break; 1255acdf5bf4SSatish Balay } 1256acdf5bf4SSatish Balay imark = i; 1257acdf5bf4SSatish Balay } 1258d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1259d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1260acdf5bf4SSatish Balay } 1261d64ed03dSBarry Smith } else { 1262d212a18eSSatish Balay if (idx) *idx = 0; 1263d212a18eSSatish Balay if (v) *v = 0; 1264d212a18eSSatish Balay } 1265acdf5bf4SSatish Balay } 1266acdf5bf4SSatish Balay *nz = nztot; 1267acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1268acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 12693a40ed3dSBarry Smith PetscFunctionReturn(0); 1270acdf5bf4SSatish Balay } 1271acdf5bf4SSatish Balay 12725615d1e5SSatish Balay #undef __FUNC__ 12735615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1274acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1275acdf5bf4SSatish Balay { 1276acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1277d64ed03dSBarry Smith 1278d64ed03dSBarry Smith PetscFunctionBegin; 1279acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1280a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1281acdf5bf4SSatish Balay } 1282acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 12833a40ed3dSBarry Smith PetscFunctionReturn(0); 1284acdf5bf4SSatish Balay } 1285acdf5bf4SSatish Balay 12865615d1e5SSatish Balay #undef __FUNC__ 12875615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1288ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 12895a838052SSatish Balay { 12905a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1291d64ed03dSBarry Smith 1292d64ed03dSBarry Smith PetscFunctionBegin; 12935a838052SSatish Balay *bs = baij->bs; 12943a40ed3dSBarry Smith PetscFunctionReturn(0); 12955a838052SSatish Balay } 12965a838052SSatish Balay 12975615d1e5SSatish Balay #undef __FUNC__ 12985615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1299ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 130058667388SSatish Balay { 130158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 130258667388SSatish Balay int ierr; 1303d64ed03dSBarry Smith 1304d64ed03dSBarry Smith PetscFunctionBegin; 130558667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 130658667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 13073a40ed3dSBarry Smith PetscFunctionReturn(0); 130858667388SSatish Balay } 13090ac07820SSatish Balay 13105615d1e5SSatish Balay #undef __FUNC__ 13115615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1312ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 13130ac07820SSatish Balay { 13144e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 13154e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 13167d57db60SLois Curfman McInnes int ierr; 13177d57db60SLois Curfman McInnes double isend[5], irecv[5]; 13180ac07820SSatish Balay 1319d64ed03dSBarry Smith PetscFunctionBegin; 13204e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 13214e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 13224e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 13234e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 13244e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 13250ac07820SSatish Balay if (flag == MAT_LOCAL) { 13264e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 13274e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 13284e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 13294e220ebcSLois Curfman McInnes info->memory = isend[3]; 13304e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 13310ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1332ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 13334e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13344e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13354e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13364e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13374e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13380ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1339ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 13404e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 13414e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 13424e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 13434e220ebcSLois Curfman McInnes info->memory = irecv[3]; 13444e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 13450ac07820SSatish Balay } 13465a5d4f66SBarry Smith info->rows_global = (double)a->M; 13475a5d4f66SBarry Smith info->columns_global = (double)a->N; 13485a5d4f66SBarry Smith info->rows_local = (double)a->m; 13495a5d4f66SBarry Smith info->columns_local = (double)a->N; 13504e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 13514e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 13524e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 13533a40ed3dSBarry Smith PetscFunctionReturn(0); 13540ac07820SSatish Balay } 13550ac07820SSatish Balay 13565615d1e5SSatish Balay #undef __FUNC__ 13575615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1358ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 135958667388SSatish Balay { 136058667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 136158667388SSatish Balay 1362d64ed03dSBarry Smith PetscFunctionBegin; 136358667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 136458667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 13656da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1366c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 136796854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1368c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1369b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1370b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1371b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1372aeafbbfcSLois Curfman McInnes a->roworiented = 1; 137358667388SSatish Balay MatSetOption(a->A,op); 137458667388SSatish Balay MatSetOption(a->B,op); 1375b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 13766da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 137758667388SSatish Balay op == MAT_SYMMETRIC || 137858667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 137958667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 138058667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 138158667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 138258667388SSatish Balay a->roworiented = 0; 138358667388SSatish Balay MatSetOption(a->A,op); 138458667388SSatish Balay MatSetOption(a->B,op); 13852b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 138690f02eecSBarry Smith a->donotstash = 1; 1387d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1388d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1389d64ed03dSBarry Smith } else { 1390d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1391d64ed03dSBarry Smith } 13923a40ed3dSBarry Smith PetscFunctionReturn(0); 139358667388SSatish Balay } 139458667388SSatish Balay 13955615d1e5SSatish Balay #undef __FUNC__ 13965615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1397ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 13980ac07820SSatish Balay { 13990ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 14000ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 14010ac07820SSatish Balay Mat B; 14020ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 14030ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 14040ac07820SSatish Balay Scalar *a; 14050ac07820SSatish Balay 1406d64ed03dSBarry Smith PetscFunctionBegin; 1407a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 14080ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 14090ac07820SSatish Balay CHKERRQ(ierr); 14100ac07820SSatish Balay 14110ac07820SSatish Balay /* copy over the A part */ 14120ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 14130ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14140ac07820SSatish Balay row = baij->rstart; 14150ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 14160ac07820SSatish Balay 14170ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14180ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14190ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14200ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14210ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 14220ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14230ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14240ac07820SSatish Balay col++; a += bs; 14250ac07820SSatish Balay } 14260ac07820SSatish Balay } 14270ac07820SSatish Balay } 14280ac07820SSatish Balay /* copy over the B part */ 14290ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 14300ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 14310ac07820SSatish Balay row = baij->rstart*bs; 14320ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 14330ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 14340ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 14350ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 14360ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 14370ac07820SSatish Balay for (k=0; k<bs; k++ ) { 14380ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 14390ac07820SSatish Balay col++; a += bs; 14400ac07820SSatish Balay } 14410ac07820SSatish Balay } 14420ac07820SSatish Balay } 14430ac07820SSatish Balay PetscFree(rvals); 14440ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14450ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 14460ac07820SSatish Balay 14470ac07820SSatish Balay if (matout != PETSC_NULL) { 14480ac07820SSatish Balay *matout = B; 14490ac07820SSatish Balay } else { 14500ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 14510ac07820SSatish Balay PetscFree(baij->rowners); 14520ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 14530ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 14540ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 14550ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 14560ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 14570ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 14580ac07820SSatish Balay PetscFree(baij); 1459f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 14600ac07820SSatish Balay PetscHeaderDestroy(B); 14610ac07820SSatish Balay } 14623a40ed3dSBarry Smith PetscFunctionReturn(0); 14630ac07820SSatish Balay } 14640e95ebc0SSatish Balay 14655615d1e5SSatish Balay #undef __FUNC__ 14665615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 14670e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 14680e95ebc0SSatish Balay { 14690e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 14700e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 14710e95ebc0SSatish Balay int ierr,s1,s2,s3; 14720e95ebc0SSatish Balay 1473d64ed03dSBarry Smith PetscFunctionBegin; 14740e95ebc0SSatish Balay if (ll) { 14750e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 14760e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1477a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 14780e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 14790e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 14800e95ebc0SSatish Balay } 1481a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 14823a40ed3dSBarry Smith PetscFunctionReturn(0); 14830e95ebc0SSatish Balay } 14840e95ebc0SSatish Balay 14855615d1e5SSatish Balay #undef __FUNC__ 14865615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1487ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 14880ac07820SSatish Balay { 14890ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 14900ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1491a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 14920ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 14930ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1494a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 14950ac07820SSatish Balay MPI_Comm comm = A->comm; 14960ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 14970ac07820SSatish Balay MPI_Status recv_status,*send_status; 14980ac07820SSatish Balay IS istmp; 14990ac07820SSatish Balay 1500d64ed03dSBarry Smith PetscFunctionBegin; 15010ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 15020ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 15030ac07820SSatish Balay 15040ac07820SSatish Balay /* first count number of contributors to each processor */ 15050ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 15060ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 15070ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 15080ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15090ac07820SSatish Balay idx = rows[i]; 15100ac07820SSatish Balay found = 0; 15110ac07820SSatish Balay for ( j=0; j<size; j++ ) { 15120ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 15130ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 15140ac07820SSatish Balay } 15150ac07820SSatish Balay } 1516a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 15170ac07820SSatish Balay } 15180ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 15190ac07820SSatish Balay 15200ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 15210ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1522ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 15230ac07820SSatish Balay nrecvs = work[rank]; 1524ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 15250ac07820SSatish Balay nmax = work[rank]; 15260ac07820SSatish Balay PetscFree(work); 15270ac07820SSatish Balay 15280ac07820SSatish Balay /* post receives: */ 1529d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1530d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 15310ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1532ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 15330ac07820SSatish Balay } 15340ac07820SSatish Balay 15350ac07820SSatish Balay /* do sends: 15360ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 15370ac07820SSatish Balay the ith processor 15380ac07820SSatish Balay */ 15390ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1540ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 15410ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 15420ac07820SSatish Balay starts[0] = 0; 15430ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15440ac07820SSatish Balay for ( i=0; i<N; i++ ) { 15450ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 15460ac07820SSatish Balay } 15470ac07820SSatish Balay ISRestoreIndices(is,&rows); 15480ac07820SSatish Balay 15490ac07820SSatish Balay starts[0] = 0; 15500ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 15510ac07820SSatish Balay count = 0; 15520ac07820SSatish Balay for ( i=0; i<size; i++ ) { 15530ac07820SSatish Balay if (procs[i]) { 1554ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 15550ac07820SSatish Balay } 15560ac07820SSatish Balay } 15570ac07820SSatish Balay PetscFree(starts); 15580ac07820SSatish Balay 15590ac07820SSatish Balay base = owners[rank]*bs; 15600ac07820SSatish Balay 15610ac07820SSatish Balay /* wait on receives */ 15620ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 15630ac07820SSatish Balay source = lens + nrecvs; 15640ac07820SSatish Balay count = nrecvs; slen = 0; 15650ac07820SSatish Balay while (count) { 1566ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 15670ac07820SSatish Balay /* unpack receives into our local space */ 1568ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 15690ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 15700ac07820SSatish Balay lens[imdex] = n; 15710ac07820SSatish Balay slen += n; 15720ac07820SSatish Balay count--; 15730ac07820SSatish Balay } 15740ac07820SSatish Balay PetscFree(recv_waits); 15750ac07820SSatish Balay 15760ac07820SSatish Balay /* move the data into the send scatter */ 15770ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 15780ac07820SSatish Balay count = 0; 15790ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 15800ac07820SSatish Balay values = rvalues + i*nmax; 15810ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 15820ac07820SSatish Balay lrows[count++] = values[j] - base; 15830ac07820SSatish Balay } 15840ac07820SSatish Balay } 15850ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 15860ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 15870ac07820SSatish Balay 15880ac07820SSatish Balay /* actually zap the local rows */ 1589029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 15900ac07820SSatish Balay PLogObjectParent(A,istmp); 1591a07cd24cSSatish Balay 1592a07cd24cSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 15930ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 15940ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 15950ac07820SSatish Balay 1596a07cd24cSSatish Balay if (diag) { 1597a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1598a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1599a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1600a07cd24cSSatish Balay } 1601a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1602a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1603a07cd24cSSatish Balay } 1604a07cd24cSSatish Balay PetscFree(lrows); 1605a07cd24cSSatish Balay 16060ac07820SSatish Balay /* wait on sends */ 16070ac07820SSatish Balay if (nsends) { 1608d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1609ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 16100ac07820SSatish Balay PetscFree(send_status); 16110ac07820SSatish Balay } 16120ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 16130ac07820SSatish Balay 16143a40ed3dSBarry Smith PetscFunctionReturn(0); 16150ac07820SSatish Balay } 1616ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 16175615d1e5SSatish Balay #undef __FUNC__ 16185615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1619ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1620ba4ca20aSSatish Balay { 1621ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 16223a40ed3dSBarry Smith int ierr; 1623ba4ca20aSSatish Balay 1624d64ed03dSBarry Smith PetscFunctionBegin; 16253a40ed3dSBarry Smith if (!a->rank) { 16263a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 16273a40ed3dSBarry Smith } 16283a40ed3dSBarry Smith PetscFunctionReturn(0); 1629ba4ca20aSSatish Balay } 16300ac07820SSatish Balay 16315615d1e5SSatish Balay #undef __FUNC__ 16325615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1633ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1634bb5a7306SBarry Smith { 1635bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1636bb5a7306SBarry Smith int ierr; 1637d64ed03dSBarry Smith 1638d64ed03dSBarry Smith PetscFunctionBegin; 1639bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 16403a40ed3dSBarry Smith PetscFunctionReturn(0); 1641bb5a7306SBarry Smith } 1642bb5a7306SBarry Smith 16430ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 16440ac07820SSatish Balay 164579bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 164679bdfe76SSatish Balay static struct _MatOps MatOps = { 1647bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 16484c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 16494c50302cSBarry Smith 0,0,0,0, 16500ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 16510e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 165258667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 16534c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 16544c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 16554c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 165694a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1657d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1658ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1659bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1660ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 166179bdfe76SSatish Balay 166279bdfe76SSatish Balay 16635615d1e5SSatish Balay #undef __FUNC__ 16645615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 166579bdfe76SSatish Balay /*@C 166679bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 166779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 166879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 166979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 167079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 167179bdfe76SSatish Balay 167279bdfe76SSatish Balay Input Parameters: 167379bdfe76SSatish Balay . comm - MPI communicator 167479bdfe76SSatish Balay . bs - size of blockk 167579bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 167692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 167792e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 167892e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 167992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 168092e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 168179bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 168292e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 168379bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 168479bdfe76SSatish Balay submatrix (same for all local rows) 168592e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 168692e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 168792e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 168892e8d321SLois Curfman McInnes it is zero. 168992e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 169079bdfe76SSatish Balay submatrix (same for all local rows). 169192e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 169292e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 169392e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 169479bdfe76SSatish Balay 169579bdfe76SSatish Balay Output Parameter: 169679bdfe76SSatish Balay . A - the matrix 169779bdfe76SSatish Balay 169879bdfe76SSatish Balay Notes: 169979bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 170079bdfe76SSatish Balay (possibly both). 170179bdfe76SSatish Balay 170279bdfe76SSatish Balay Storage Information: 170379bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 170479bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 170579bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 170679bdfe76SSatish Balay local matrix (a rectangular submatrix). 170779bdfe76SSatish Balay 170879bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 170979bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 171079bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 171179bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 171279bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 171379bdfe76SSatish Balay 171479bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 171579bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 171679bdfe76SSatish Balay 171779bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 171879bdfe76SSatish Balay $ ------------------- 171979bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 172079bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 172179bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 172279bdfe76SSatish Balay $ ------------------- 172379bdfe76SSatish Balay $ 172479bdfe76SSatish Balay 172579bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 172679bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 172779bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 172857b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 172979bdfe76SSatish Balay 1730d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1731d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 173279bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 173392e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 173492e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 17356da5968aSLois Curfman McInnes matrices. 173679bdfe76SSatish Balay 173792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 173879bdfe76SSatish Balay 173979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 174079bdfe76SSatish Balay @*/ 174179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 174279bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 174379bdfe76SSatish Balay { 174479bdfe76SSatish Balay Mat B; 174579bdfe76SSatish Balay Mat_MPIBAIJ *b; 17464c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 174779bdfe76SSatish Balay 1748d64ed03dSBarry Smith PetscFunctionBegin; 1749a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 17503914022bSBarry Smith 17513914022bSBarry Smith MPI_Comm_size(comm,&size); 17523914022bSBarry Smith if (size == 1) { 17533914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 17543914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 17553914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 17563a40ed3dSBarry Smith PetscFunctionReturn(0); 17573914022bSBarry Smith } 17583914022bSBarry Smith 175979bdfe76SSatish Balay *A = 0; 1760d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 176179bdfe76SSatish Balay PLogObjectCreate(B); 176279bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 176379bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 176479bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 17654c50302cSBarry Smith 176679bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 176779bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 176890f02eecSBarry Smith B->mapping = 0; 176979bdfe76SSatish Balay B->factor = 0; 177079bdfe76SSatish Balay B->assembled = PETSC_FALSE; 177179bdfe76SSatish Balay 1772e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 177379bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 177479bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 177579bdfe76SSatish Balay 1776d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1777a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1778d64ed03dSBarry Smith } 1779a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1780a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1781a8c6a408SBarry Smith } 1782a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1783a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1784a8c6a408SBarry Smith } 1785cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1786cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 178779bdfe76SSatish Balay 178879bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 178979bdfe76SSatish Balay work[0] = m; work[1] = n; 179079bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1791ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 179279bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 179379bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 179479bdfe76SSatish Balay } 179579bdfe76SSatish Balay if (m == PETSC_DECIDE) { 179679bdfe76SSatish Balay Mbs = M/bs; 1797a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 179879bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 179979bdfe76SSatish Balay m = mbs*bs; 180079bdfe76SSatish Balay } 180179bdfe76SSatish Balay if (n == PETSC_DECIDE) { 180279bdfe76SSatish Balay Nbs = N/bs; 1803a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 180479bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 180579bdfe76SSatish Balay n = nbs*bs; 180679bdfe76SSatish Balay } 1807a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 1808a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1809a8c6a408SBarry Smith } 181079bdfe76SSatish Balay 181179bdfe76SSatish Balay b->m = m; B->m = m; 181279bdfe76SSatish Balay b->n = n; B->n = n; 181379bdfe76SSatish Balay b->N = N; B->N = N; 181479bdfe76SSatish Balay b->M = M; B->M = M; 181579bdfe76SSatish Balay b->bs = bs; 181679bdfe76SSatish Balay b->bs2 = bs*bs; 181779bdfe76SSatish Balay b->mbs = mbs; 181879bdfe76SSatish Balay b->nbs = nbs; 181979bdfe76SSatish Balay b->Mbs = Mbs; 182079bdfe76SSatish Balay b->Nbs = Nbs; 182179bdfe76SSatish Balay 182279bdfe76SSatish Balay /* build local table of row and column ownerships */ 182379bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1824f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 18250ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 1826ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 182779bdfe76SSatish Balay b->rowners[0] = 0; 182879bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 182979bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 183079bdfe76SSatish Balay } 183179bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 183279bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 18334fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 18344fa0d573SSatish Balay b->rend_bs = b->rend * bs; 18354fa0d573SSatish Balay 1836ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 183779bdfe76SSatish Balay b->cowners[0] = 0; 183879bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 183979bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 184079bdfe76SSatish Balay } 184179bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 184279bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 18434fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 18444fa0d573SSatish Balay b->cend_bs = b->cend * bs; 184579bdfe76SSatish Balay 1846a07cd24cSSatish Balay 184779bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1848029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 184979bdfe76SSatish Balay PLogObjectParent(B,b->A); 185079bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1851029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 185279bdfe76SSatish Balay PLogObjectParent(B,b->B); 185379bdfe76SSatish Balay 185479bdfe76SSatish Balay /* build cache for off array entries formed */ 185579bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 185690f02eecSBarry Smith b->donotstash = 0; 185779bdfe76SSatish Balay b->colmap = 0; 185879bdfe76SSatish Balay b->garray = 0; 185979bdfe76SSatish Balay b->roworiented = 1; 186079bdfe76SSatish Balay 186130793edcSSatish Balay /* stuff used in block assembly */ 186230793edcSSatish Balay b->barray = 0; 186330793edcSSatish Balay 186479bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 186579bdfe76SSatish Balay b->lvec = 0; 186679bdfe76SSatish Balay b->Mvctx = 0; 186779bdfe76SSatish Balay 186879bdfe76SSatish Balay /* stuff for MatGetRow() */ 186979bdfe76SSatish Balay b->rowindices = 0; 187079bdfe76SSatish Balay b->rowvalues = 0; 187179bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 187279bdfe76SSatish Balay 1873a07cd24cSSatish Balay /* hash table stuff */ 1874a07cd24cSSatish Balay b->ht = 0; 18750bdbc534SSatish Balay b->ht_size = 0; 1876a07cd24cSSatish Balay 187779bdfe76SSatish Balay *A = B; 18783a40ed3dSBarry Smith PetscFunctionReturn(0); 187979bdfe76SSatish Balay } 1880026e39d0SSatish Balay 18815615d1e5SSatish Balay #undef __FUNC__ 18825615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 18830ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 18840ac07820SSatish Balay { 18850ac07820SSatish Balay Mat mat; 18860ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 18870ac07820SSatish Balay int ierr, len=0, flg; 18880ac07820SSatish Balay 1889d64ed03dSBarry Smith PetscFunctionBegin; 18900ac07820SSatish Balay *newmat = 0; 1891d4bb536fSBarry Smith PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 18920ac07820SSatish Balay PLogObjectCreate(mat); 18930ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 18940ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 18950ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 18960ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 18970ac07820SSatish Balay mat->factor = matin->factor; 18980ac07820SSatish Balay mat->assembled = PETSC_TRUE; 18990ac07820SSatish Balay 19000ac07820SSatish Balay a->m = mat->m = oldmat->m; 19010ac07820SSatish Balay a->n = mat->n = oldmat->n; 19020ac07820SSatish Balay a->M = mat->M = oldmat->M; 19030ac07820SSatish Balay a->N = mat->N = oldmat->N; 19040ac07820SSatish Balay 19050ac07820SSatish Balay a->bs = oldmat->bs; 19060ac07820SSatish Balay a->bs2 = oldmat->bs2; 19070ac07820SSatish Balay a->mbs = oldmat->mbs; 19080ac07820SSatish Balay a->nbs = oldmat->nbs; 19090ac07820SSatish Balay a->Mbs = oldmat->Mbs; 19100ac07820SSatish Balay a->Nbs = oldmat->Nbs; 19110ac07820SSatish Balay 19120ac07820SSatish Balay a->rstart = oldmat->rstart; 19130ac07820SSatish Balay a->rend = oldmat->rend; 19140ac07820SSatish Balay a->cstart = oldmat->cstart; 19150ac07820SSatish Balay a->cend = oldmat->cend; 19160ac07820SSatish Balay a->size = oldmat->size; 19170ac07820SSatish Balay a->rank = oldmat->rank; 1918e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 19190ac07820SSatish Balay a->rowvalues = 0; 19200ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 192130793edcSSatish Balay a->barray = 0; 19220ac07820SSatish Balay 19230ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1924f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 19250ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 19260ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 19270ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 19280ac07820SSatish Balay if (oldmat->colmap) { 19290ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 19300ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 19310ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 19320ac07820SSatish Balay } else a->colmap = 0; 19330ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 19340ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 19350ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 19360ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 19370ac07820SSatish Balay } else a->garray = 0; 19380ac07820SSatish Balay 19390ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 19400ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 19410ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 19420ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 19430ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 19440ac07820SSatish Balay PLogObjectParent(mat,a->A); 19450ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 19460ac07820SSatish Balay PLogObjectParent(mat,a->B); 19470ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 19480ac07820SSatish Balay if (flg) { 19490ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 19500ac07820SSatish Balay } 19510ac07820SSatish Balay *newmat = mat; 19523a40ed3dSBarry Smith PetscFunctionReturn(0); 19530ac07820SSatish Balay } 195457b952d6SSatish Balay 195557b952d6SSatish Balay #include "sys.h" 195657b952d6SSatish Balay 19575615d1e5SSatish Balay #undef __FUNC__ 19585615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 195957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 196057b952d6SSatish Balay { 196157b952d6SSatish Balay Mat A; 196257b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 196357b952d6SSatish Balay Scalar *vals,*buf; 196457b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 196557b952d6SSatish Balay MPI_Status status; 1966cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 196757b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 196857b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 196957b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 197057b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 197157b952d6SSatish Balay 1972d64ed03dSBarry Smith PetscFunctionBegin; 197357b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 197457b952d6SSatish Balay bs2 = bs*bs; 197557b952d6SSatish Balay 197657b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 197757b952d6SSatish Balay if (!rank) { 197857b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1979e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1980a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1981d64ed03dSBarry Smith if (header[3] < 0) { 1982a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 1983d64ed03dSBarry Smith } 19846c5fab8fSBarry Smith } 1985d64ed03dSBarry Smith 1986ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 198757b952d6SSatish Balay M = header[1]; N = header[2]; 198857b952d6SSatish Balay 1989a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 199057b952d6SSatish Balay 199157b952d6SSatish Balay /* 199257b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 199357b952d6SSatish Balay divisible by the blocksize 199457b952d6SSatish Balay */ 199557b952d6SSatish Balay Mbs = M/bs; 199657b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 199757b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 199857b952d6SSatish Balay else Mbs++; 199957b952d6SSatish Balay if (extra_rows &&!rank) { 2000b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 200157b952d6SSatish Balay } 2002537820f0SBarry Smith 200357b952d6SSatish Balay /* determine ownership of all rows */ 200457b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 200557b952d6SSatish Balay m = mbs * bs; 2006cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2007cee3aa6bSSatish Balay browners = rowners + size + 1; 2008ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 200957b952d6SSatish Balay rowners[0] = 0; 2010cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2011cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 201257b952d6SSatish Balay rstart = rowners[rank]; 201357b952d6SSatish Balay rend = rowners[rank+1]; 201457b952d6SSatish Balay 201557b952d6SSatish Balay /* distribute row lengths to all processors */ 201657b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 201757b952d6SSatish Balay if (!rank) { 201857b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2019e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 202057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 202157b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2022cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2023ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 202457b952d6SSatish Balay PetscFree(sndcounts); 2025d64ed03dSBarry Smith } else { 2026ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 202757b952d6SSatish Balay } 202857b952d6SSatish Balay 202957b952d6SSatish Balay if (!rank) { 203057b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 203157b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 203257b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 203357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 203457b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 203557b952d6SSatish Balay procsnz[i] += rowlengths[j]; 203657b952d6SSatish Balay } 203757b952d6SSatish Balay } 203857b952d6SSatish Balay PetscFree(rowlengths); 203957b952d6SSatish Balay 204057b952d6SSatish Balay /* determine max buffer needed and allocate it */ 204157b952d6SSatish Balay maxnz = 0; 204257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 204357b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 204457b952d6SSatish Balay } 204557b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 204657b952d6SSatish Balay 204757b952d6SSatish Balay /* read in my part of the matrix column indices */ 204857b952d6SSatish Balay nz = procsnz[0]; 204957b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 205057b952d6SSatish Balay mycols = ibuf; 2051cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2052e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2053cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2054cee3aa6bSSatish Balay 205557b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 205657b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 205757b952d6SSatish Balay nz = procsnz[i]; 2058e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2059ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 206057b952d6SSatish Balay } 206157b952d6SSatish Balay /* read in the stuff for the last proc */ 206257b952d6SSatish Balay if ( size != 1 ) { 206357b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2064e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 206557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2066ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 206757b952d6SSatish Balay } 206857b952d6SSatish Balay PetscFree(cols); 2069d64ed03dSBarry Smith } else { 207057b952d6SSatish Balay /* determine buffer space needed for message */ 207157b952d6SSatish Balay nz = 0; 207257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 207357b952d6SSatish Balay nz += locrowlens[i]; 207457b952d6SSatish Balay } 207557b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 207657b952d6SSatish Balay mycols = ibuf; 207757b952d6SSatish Balay /* receive message of column indices*/ 2078ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2079ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2080a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 208157b952d6SSatish Balay } 208257b952d6SSatish Balay 208357b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2084cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2085cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 208657b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2087cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 208857b952d6SSatish Balay masked1 = mask + Mbs; 208957b952d6SSatish Balay masked2 = masked1 + Mbs; 209057b952d6SSatish Balay rowcount = 0; nzcount = 0; 209157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 209257b952d6SSatish Balay dcount = 0; 209357b952d6SSatish Balay odcount = 0; 209457b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 209557b952d6SSatish Balay kmax = locrowlens[rowcount]; 209657b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 209757b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 209857b952d6SSatish Balay if (!mask[tmp]) { 209957b952d6SSatish Balay mask[tmp] = 1; 210057b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 210157b952d6SSatish Balay else masked1[dcount++] = tmp; 210257b952d6SSatish Balay } 210357b952d6SSatish Balay } 210457b952d6SSatish Balay rowcount++; 210557b952d6SSatish Balay } 2106cee3aa6bSSatish Balay 210757b952d6SSatish Balay dlens[i] = dcount; 210857b952d6SSatish Balay odlens[i] = odcount; 2109cee3aa6bSSatish Balay 211057b952d6SSatish Balay /* zero out the mask elements we set */ 211157b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 211257b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 211357b952d6SSatish Balay } 2114cee3aa6bSSatish Balay 211557b952d6SSatish Balay /* create our matrix */ 2116537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2117537820f0SBarry Smith CHKERRQ(ierr); 211857b952d6SSatish Balay A = *newmat; 21196d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 212057b952d6SSatish Balay 212157b952d6SSatish Balay if (!rank) { 212257b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 212357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 212457b952d6SSatish Balay nz = procsnz[0]; 212557b952d6SSatish Balay vals = buf; 2126cee3aa6bSSatish Balay mycols = ibuf; 2127cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2128e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2129cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2130537820f0SBarry Smith 213157b952d6SSatish Balay /* insert into matrix */ 213257b952d6SSatish Balay jj = rstart*bs; 213357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 213457b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 213557b952d6SSatish Balay mycols += locrowlens[i]; 213657b952d6SSatish Balay vals += locrowlens[i]; 213757b952d6SSatish Balay jj++; 213857b952d6SSatish Balay } 213957b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 214057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 214157b952d6SSatish Balay nz = procsnz[i]; 214257b952d6SSatish Balay vals = buf; 2143e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2144ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 214557b952d6SSatish Balay } 214657b952d6SSatish Balay /* the last proc */ 214757b952d6SSatish Balay if ( size != 1 ){ 214857b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2149cee3aa6bSSatish Balay vals = buf; 2150e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 215157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2152ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 215357b952d6SSatish Balay } 215457b952d6SSatish Balay PetscFree(procsnz); 2155d64ed03dSBarry Smith } else { 215657b952d6SSatish Balay /* receive numeric values */ 215757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 215857b952d6SSatish Balay 215957b952d6SSatish Balay /* receive message of values*/ 216057b952d6SSatish Balay vals = buf; 2161cee3aa6bSSatish Balay mycols = ibuf; 2162ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2163ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2164a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 216557b952d6SSatish Balay 216657b952d6SSatish Balay /* insert into matrix */ 216757b952d6SSatish Balay jj = rstart*bs; 2168cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 216957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 217057b952d6SSatish Balay mycols += locrowlens[i]; 217157b952d6SSatish Balay vals += locrowlens[i]; 217257b952d6SSatish Balay jj++; 217357b952d6SSatish Balay } 217457b952d6SSatish Balay } 217557b952d6SSatish Balay PetscFree(locrowlens); 217657b952d6SSatish Balay PetscFree(buf); 217757b952d6SSatish Balay PetscFree(ibuf); 217857b952d6SSatish Balay PetscFree(rowners); 217957b952d6SSatish Balay PetscFree(dlens); 2180cee3aa6bSSatish Balay PetscFree(mask); 21816d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 21826d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 21833a40ed3dSBarry Smith PetscFunctionReturn(0); 218457b952d6SSatish Balay } 218557b952d6SSatish Balay 218657b952d6SSatish Balay 2187