1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*a07cd24cSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.91 1997/12/01 01:55:07 bsmith Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 53369ce9aSBarry Smith #include "pinclude/pviewer.h" 670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 879bdfe76SSatish Balay 957b952d6SSatish Balay 1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 143b2fbd54SBarry Smith 15537820f0SBarry Smith /* 16537820f0SBarry Smith Local utility routine that creates a mapping from the global column 1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1957b952d6SSatish Balay length of colmap equals the global matrix length. 2057b952d6SSatish Balay */ 215615d1e5SSatish Balay #undef __FUNC__ 225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2457b952d6SSatish Balay { 2557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27928fc39bSSatish Balay int nbs = B->nbs,i,bs=B->bs;; 2857b952d6SSatish Balay 29d64ed03dSBarry Smith PetscFunctionBegin; 30758f045eSSatish Balay baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 3157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3257b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33928fc39bSSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 343a40ed3dSBarry Smith PetscFunctionReturn(0); 3557b952d6SSatish Balay } 3657b952d6SSatish Balay 3780c1aa95SSatish Balay #define CHUNKSIZE 10 3880c1aa95SSatish Balay 39f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 4080c1aa95SSatish Balay { \ 4180c1aa95SSatish Balay \ 4280c1aa95SSatish Balay brow = row/bs; \ 4380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 4580c1aa95SSatish Balay bcol = col/bs; \ 4680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 47ab26458aSBarry Smith low = 0; high = nrow; \ 48ab26458aSBarry Smith while (high-low > 3) { \ 49ab26458aSBarry Smith t = (low+high)/2; \ 50ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 51ab26458aSBarry Smith else low = t; \ 52ab26458aSBarry Smith } \ 53ab26458aSBarry Smith for ( _i=low; _i<high; _i++ ) { \ 5480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 5580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 5680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 57eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 58eada6651SSatish Balay else *bap = value; \ 59ac7a638eSSatish Balay goto a_noinsert; \ 6080c1aa95SSatish Balay } \ 6180c1aa95SSatish Balay } \ 6289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 63a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 6480c1aa95SSatish Balay if (nrow >= rmax) { \ 6580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 6680c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 6780c1aa95SSatish Balay Scalar *new_a; \ 6880c1aa95SSatish Balay \ 69a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 7089280ab3SLois Curfman McInnes \ 7180c1aa95SSatish Balay /* malloc new storage space */ \ 7280c1aa95SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 7380c1aa95SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 7480c1aa95SSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 7580c1aa95SSatish Balay new_i = new_j + new_nz; \ 7680c1aa95SSatish Balay \ 7780c1aa95SSatish Balay /* copy over old data into new slots */ \ 7880c1aa95SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 7980c1aa95SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 8080c1aa95SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 8180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 8280c1aa95SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 8380c1aa95SSatish Balay len*sizeof(int)); \ 8480c1aa95SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 8580c1aa95SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 8680c1aa95SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 8780c1aa95SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 8880c1aa95SSatish Balay /* free up old matrix storage */ \ 8980c1aa95SSatish Balay PetscFree(a->a); \ 9080c1aa95SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 9180c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 9280c1aa95SSatish Balay a->singlemalloc = 1; \ 9380c1aa95SSatish Balay \ 9480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 9680c1aa95SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 9780c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 9880c1aa95SSatish Balay a->reallocs++; \ 9980c1aa95SSatish Balay a->nz++; \ 10080c1aa95SSatish Balay } \ 10180c1aa95SSatish Balay N = nrow++ - 1; \ 10280c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 10380c1aa95SSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 10480c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 10580c1aa95SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 10680c1aa95SSatish Balay } \ 10780c1aa95SSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 10880c1aa95SSatish Balay rp[_i] = bcol; \ 10980c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 110ac7a638eSSatish Balay a_noinsert:; \ 11180c1aa95SSatish Balay ailen[brow] = nrow; \ 11280c1aa95SSatish Balay } 11357b952d6SSatish Balay 114ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115ac7a638eSSatish Balay { \ 116ac7a638eSSatish Balay \ 117ac7a638eSSatish Balay brow = row/bs; \ 118ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 120ac7a638eSSatish Balay bcol = col/bs; \ 121ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 122ac7a638eSSatish Balay low = 0; high = nrow; \ 123ac7a638eSSatish Balay while (high-low > 3) { \ 124ac7a638eSSatish Balay t = (low+high)/2; \ 125ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 126ac7a638eSSatish Balay else low = t; \ 127ac7a638eSSatish Balay } \ 128ac7a638eSSatish Balay for ( _i=low; _i<high; _i++ ) { \ 129ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 130ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 131ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 132ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 133ac7a638eSSatish Balay else *bap = value; \ 134ac7a638eSSatish Balay goto b_noinsert; \ 135ac7a638eSSatish Balay } \ 136ac7a638eSSatish Balay } \ 13789280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 138a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139ac7a638eSSatish Balay if (nrow >= rmax) { \ 140ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 14174c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142ac7a638eSSatish Balay Scalar *new_a; \ 143ac7a638eSSatish Balay \ 144a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 14589280ab3SLois Curfman McInnes \ 146ac7a638eSSatish Balay /* malloc new storage space */ \ 14774c639caSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148ac7a638eSSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149ac7a638eSSatish Balay new_j = (int *) (new_a + bs2*new_nz); \ 150ac7a638eSSatish Balay new_i = new_j + new_nz; \ 151ac7a638eSSatish Balay \ 152ac7a638eSSatish Balay /* copy over old data into new slots */ \ 153ac7a638eSSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 15474c639caSSatish Balay for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155ac7a638eSSatish Balay PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157ac7a638eSSatish Balay PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158ac7a638eSSatish Balay len*sizeof(int)); \ 159ac7a638eSSatish Balay PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160ac7a638eSSatish Balay PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161ac7a638eSSatish Balay PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162ac7a638eSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163ac7a638eSSatish Balay /* free up old matrix storage */ \ 16474c639caSSatish Balay PetscFree(b->a); \ 16574c639caSSatish Balay if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 16674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 16774c639caSSatish Balay b->singlemalloc = 1; \ 168ac7a638eSSatish Balay \ 169ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 17174c639caSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 17274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 17374c639caSSatish Balay b->reallocs++; \ 17474c639caSSatish Balay b->nz++; \ 175ac7a638eSSatish Balay } \ 176ac7a638eSSatish Balay N = nrow++ - 1; \ 177ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 178ac7a638eSSatish Balay for ( ii=N; ii>=_i; ii-- ) { \ 179ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 180ac7a638eSSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183ac7a638eSSatish Balay rp[_i] = bcol; \ 184ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 185ac7a638eSSatish Balay b_noinsert:; \ 186ac7a638eSSatish Balay bilen[brow] = nrow; \ 187ac7a638eSSatish Balay } 188ac7a638eSSatish Balay 1895615d1e5SSatish Balay #undef __FUNC__ 1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ" 191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 19257b952d6SSatish Balay { 19357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 19457b952d6SSatish Balay Scalar value; 1954fa0d573SSatish Balay int ierr,i,j,row,col; 1964fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 1974fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 1984fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 19957b952d6SSatish Balay 200eada6651SSatish Balay /* Some Variables required in the macro */ 20180c1aa95SSatish Balay Mat A = baij->A; 20280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204ac7a638eSSatish Balay Scalar *aa=a->a; 205ac7a638eSSatish Balay 206ac7a638eSSatish Balay Mat B = baij->B; 207ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209ac7a638eSSatish Balay Scalar *ba=b->a; 210ac7a638eSSatish Balay 211ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 213ac7a638eSSatish Balay Scalar *ap,*bap; 21480c1aa95SSatish Balay 215d64ed03dSBarry Smith PetscFunctionBegin; 21657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 218a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220639f9d9dSBarry Smith #endif 22157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 22257b952d6SSatish Balay row = im[i] - rstart_orig; 22357b952d6SSatish Balay for ( j=0; j<n; j++ ) { 22457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 22557b952d6SSatish Balay col = in[j] - cstart_orig; 22657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22957b952d6SSatish Balay } 2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 231a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233639f9d9dSBarry Smith #endif 23457b952d6SSatish Balay else { 23557b952d6SSatish Balay if (mat->was_assembled) { 236905e6a2fSBarry Smith if (!baij->colmap) { 237905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238905e6a2fSBarry Smith } 239905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 24057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 24157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 24257b952d6SSatish Balay col = in[j]; 2439bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2449bf004c3SSatish Balay B = baij->B; 2459bf004c3SSatish Balay b = (Mat_SeqBAIJ *) (B)->data; 2469bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2479bf004c3SSatish Balay ba=b->a; 24857b952d6SSatish Balay } 249d64ed03dSBarry Smith } else col = in[j]; 25057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 255d64ed03dSBarry Smith } else { 25690f02eecSBarry Smith if (roworiented && !baij->donotstash) { 25757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258d64ed03dSBarry Smith } else { 25990f02eecSBarry Smith if (!baij->donotstash) { 26057b952d6SSatish Balay row = im[i]; 26157b952d6SSatish Balay for ( j=0; j<n; j++ ) { 26257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 26357b952d6SSatish Balay } 26457b952d6SSatish Balay } 26557b952d6SSatish Balay } 26657b952d6SSatish Balay } 26790f02eecSBarry Smith } 2683a40ed3dSBarry Smith PetscFunctionReturn(0); 26957b952d6SSatish Balay } 27057b952d6SSatish Balay 271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272ab26458aSBarry Smith #undef __FUNC__ 273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275ab26458aSBarry Smith { 276ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27730793edcSSatish Balay Scalar *value,*barray=baij->barray; 278abef11f7SSatish Balay int ierr,i,j,ii,jj,row,col,k,l; 279ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 280ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 281ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282ab26458aSBarry Smith 28330793edcSSatish Balay if(!barray) { 28447513183SBarry Smith baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 28530793edcSSatish Balay } 28630793edcSSatish Balay 287ab26458aSBarry Smith if (roworiented) { 288ab26458aSBarry Smith stepval = (n-1)*bs; 289ab26458aSBarry Smith } else { 290ab26458aSBarry Smith stepval = (m-1)*bs; 291ab26458aSBarry Smith } 292ab26458aSBarry Smith for ( i=0; i<m; i++ ) { 2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 294a8c6a408SBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295a8c6a408SBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296ab26458aSBarry Smith #endif 297ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 298ab26458aSBarry Smith row = im[i] - rstart; 299ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 30015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 30115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 30215b57d14SSatish Balay barray = v + i*bs2; 30315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 30415b57d14SSatish Balay barray = v + j*bs2; 30515b57d14SSatish Balay } else { /* Here a copy is required */ 306ab26458aSBarry Smith if (roworiented) { 307ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 308ab26458aSBarry Smith } else { 309ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 310abef11f7SSatish Balay } 31147513183SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 31247513183SBarry Smith for (jj=0; jj<bs; jj++ ) { 31330793edcSSatish Balay *barray++ = *value++; 31447513183SBarry Smith } 31547513183SBarry Smith } 31630793edcSSatish Balay barray -=bs2; 31715b57d14SSatish Balay } 318abef11f7SSatish Balay 319abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 320abef11f7SSatish Balay col = in[j] - cstart; 32130793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322ab26458aSBarry Smith } 3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 324a8c6a408SBarry Smith else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325a8c6a408SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326ab26458aSBarry Smith #endif 327ab26458aSBarry Smith else { 328ab26458aSBarry Smith if (mat->was_assembled) { 329ab26458aSBarry Smith if (!baij->colmap) { 330ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331ab26458aSBarry Smith } 332a5eb4965SSatish Balay 3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 334a8c6a408SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335a5eb4965SSatish Balay #endif 336a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 337ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339ab26458aSBarry Smith col = in[j]; 340ab26458aSBarry Smith } 341ab26458aSBarry Smith } 342ab26458aSBarry Smith else col = in[j]; 34330793edcSSatish Balay ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344ab26458aSBarry Smith } 345ab26458aSBarry Smith } 346d64ed03dSBarry Smith } else { 347ab26458aSBarry Smith if (!baij->donotstash) { 348ab26458aSBarry Smith if (roworiented ) { 349abef11f7SSatish Balay row = im[i]*bs; 350abef11f7SSatish Balay value = v + i*(stepval+bs)*bs; 351abef11f7SSatish Balay for ( j=0; j<bs; j++,row++ ) { 352abef11f7SSatish Balay for ( k=0; k<n; k++ ) { 353abef11f7SSatish Balay for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355abef11f7SSatish Balay } 356ab26458aSBarry Smith } 357ab26458aSBarry Smith } 358d64ed03dSBarry Smith } else { 359ab26458aSBarry Smith for ( j=0; j<n; j++ ) { 360abef11f7SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 361abef11f7SSatish Balay col = in[j]*bs; 362abef11f7SSatish Balay for ( k=0; k<bs; k++,col++,value+=stepval) { 363abef11f7SSatish Balay for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364abef11f7SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365ab26458aSBarry Smith } 366ab26458aSBarry Smith } 367ab26458aSBarry Smith } 368abef11f7SSatish Balay } 369abef11f7SSatish Balay } 370ab26458aSBarry Smith } 371ab26458aSBarry Smith } 3723a40ed3dSBarry Smith PetscFunctionReturn(0); 373ab26458aSBarry Smith } 374ab26458aSBarry Smith 3755615d1e5SSatish Balay #undef __FUNC__ 3765615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ" 377ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 378d6de1c52SSatish Balay { 379d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 380d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 381d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 382d6de1c52SSatish Balay 383d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 384a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 385a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 386d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 387d6de1c52SSatish Balay row = idxm[i] - bsrstart; 388d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 389a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 390a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 391d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 392d6de1c52SSatish Balay col = idxn[j] - bscstart; 393d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 394d64ed03dSBarry Smith } else { 395905e6a2fSBarry Smith if (!baij->colmap) { 396905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 397905e6a2fSBarry Smith } 398e60e1c95SSatish Balay if((baij->colmap[idxn[j]/bs]-1 < 0) || 399dcb20de4SSatish Balay (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 400d9d09a02SSatish Balay else { 401dcb20de4SSatish Balay col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 402d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 403d6de1c52SSatish Balay } 404d6de1c52SSatish Balay } 405d6de1c52SSatish Balay } 406d64ed03dSBarry Smith } else { 407a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 408d6de1c52SSatish Balay } 409d6de1c52SSatish Balay } 4103a40ed3dSBarry Smith PetscFunctionReturn(0); 411d6de1c52SSatish Balay } 412d6de1c52SSatish Balay 4135615d1e5SSatish Balay #undef __FUNC__ 4145615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ" 415ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 416d6de1c52SSatish Balay { 417d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 418d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 419acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 420d6de1c52SSatish Balay double sum = 0.0; 421d6de1c52SSatish Balay Scalar *v; 422d6de1c52SSatish Balay 423d64ed03dSBarry Smith PetscFunctionBegin; 424d6de1c52SSatish Balay if (baij->size == 1) { 425d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 426d6de1c52SSatish Balay } else { 427d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 428d6de1c52SSatish Balay v = amat->a; 429d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 4303a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 431d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 432d6de1c52SSatish Balay #else 433d6de1c52SSatish Balay sum += (*v)*(*v); v++; 434d6de1c52SSatish Balay #endif 435d6de1c52SSatish Balay } 436d6de1c52SSatish Balay v = bmat->a; 437d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 4383a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 439d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 440d6de1c52SSatish Balay #else 441d6de1c52SSatish Balay sum += (*v)*(*v); v++; 442d6de1c52SSatish Balay #endif 443d6de1c52SSatish Balay } 444ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 445d6de1c52SSatish Balay *norm = sqrt(*norm); 446d64ed03dSBarry Smith } else { 447e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 448d6de1c52SSatish Balay } 449d64ed03dSBarry Smith } 4503a40ed3dSBarry Smith PetscFunctionReturn(0); 451d6de1c52SSatish Balay } 45257b952d6SSatish Balay 4535615d1e5SSatish Balay #undef __FUNC__ 4545615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 455ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 45657b952d6SSatish Balay { 45757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 45857b952d6SSatish Balay MPI_Comm comm = mat->comm; 45957b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 46057b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 46157b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 46257b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 46357b952d6SSatish Balay InsertMode addv; 46457b952d6SSatish Balay Scalar *rvalues,*svalues; 46557b952d6SSatish Balay 466d64ed03dSBarry Smith PetscFunctionBegin; 46757b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 468ca161407SBarry Smith ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 46957b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 470a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 47157b952d6SSatish Balay } 472e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 47357b952d6SSatish Balay 47457b952d6SSatish Balay /* first count number of contributors to each processor */ 47557b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 47657b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 47757b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 47857b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 47957b952d6SSatish Balay idx = baij->stash.idx[i]; 48057b952d6SSatish Balay for ( j=0; j<size; j++ ) { 48157b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 48257b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 48357b952d6SSatish Balay } 48457b952d6SSatish Balay } 48557b952d6SSatish Balay } 48657b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 48757b952d6SSatish Balay 48857b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 48957b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 490ca161407SBarry Smith ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 49157b952d6SSatish Balay nreceives = work[rank]; 492ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 49357b952d6SSatish Balay nmax = work[rank]; 49457b952d6SSatish Balay PetscFree(work); 49557b952d6SSatish Balay 49657b952d6SSatish Balay /* post receives: 49757b952d6SSatish Balay 1) each message will consist of ordered pairs 49857b952d6SSatish Balay (global index,value) we store the global index as a double 49957b952d6SSatish Balay to simplify the message passing. 50057b952d6SSatish Balay 2) since we don't know how long each individual message is we 50157b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 50257b952d6SSatish Balay this is a lot of wasted space. 50357b952d6SSatish Balay 50457b952d6SSatish Balay 50557b952d6SSatish Balay This could be done better. 50657b952d6SSatish Balay */ 507f8abc2e8SBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 508f8abc2e8SBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 50957b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 510ca161407SBarry Smith ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 51157b952d6SSatish Balay } 51257b952d6SSatish Balay 51357b952d6SSatish Balay /* do sends: 51457b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 51557b952d6SSatish Balay the ith processor 51657b952d6SSatish Balay */ 51757b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 518d64ed03dSBarry Smith send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 51957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 52057b952d6SSatish Balay starts[0] = 0; 52157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 52257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 52357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 52457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 52557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 52657b952d6SSatish Balay } 52757b952d6SSatish Balay PetscFree(owner); 52857b952d6SSatish Balay starts[0] = 0; 52957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 53057b952d6SSatish Balay count = 0; 53157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 53257b952d6SSatish Balay if (procs[i]) { 533ca161407SBarry Smith ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 53457b952d6SSatish Balay } 53557b952d6SSatish Balay } 53657b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 53757b952d6SSatish Balay 53857b952d6SSatish Balay /* Free cache space */ 539cd1fa5fbSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 54057b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 54157b952d6SSatish Balay 54257b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 54357b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 54457b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 54557b952d6SSatish Balay baij->rmax = nmax; 54657b952d6SSatish Balay 5473a40ed3dSBarry Smith PetscFunctionReturn(0); 54857b952d6SSatish Balay } 549596b8d2eSBarry Smith #include <math.h> 550596b8d2eSBarry Smith #define HASH_KEY 0.6180339887 551bd7f49f5SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 552bd7f49f5SSatish Balay #define HASH2(size,key) ((int)((size)*fmod(((key+0.5)*HASH_KEY),1))) 55357b952d6SSatish Balay 554bd7f49f5SSatish Balay 555bd7f49f5SSatish Balay int CreateHashTable(Mat mat,double factor) 556596b8d2eSBarry Smith { 557596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 558596b8d2eSBarry Smith Mat A = baij->A, B=baij->B; 559596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 560bd7f49f5SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,h2,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 561bd7f49f5SSatish Balay int size=(int)(factor*nz),ct=0,max1=0,max2=0; 562*a07cd24cSSatish Balay double *HT,key; 563*a07cd24cSSatish Balay extern int PetscGlobalRank; 564*a07cd24cSSatish Balay /* 565*a07cd24cSSatish Balay Scalar *aa=a->a,*ba=b->a; 566596b8d2eSBarry Smith static double *HT; 567596b8d2eSBarry Smith static int flag=1; 568*a07cd24cSSatish Balay */ 569d64ed03dSBarry Smith 570d64ed03dSBarry Smith PetscFunctionBegin; 571596b8d2eSBarry Smith /* Allocate Memory for Hash Table */ 572*a07cd24cSSatish Balay if (!baij->ht) { 573*a07cd24cSSatish Balay baij->ht = (double*)PetscMalloc(size*sizeof(double)); CHKPTRQ(baij->ht); 574596b8d2eSBarry Smith } 575*a07cd24cSSatish Balay HT = baij->ht; 576596b8d2eSBarry Smith PetscMemzero(HT,size*sizeof(double)); 577596b8d2eSBarry Smith 578596b8d2eSBarry Smith /* Loop Over A */ 579596b8d2eSBarry Smith for ( i=0; i<a->n; i++ ) { 580596b8d2eSBarry Smith for ( j=ai[i]; j<ai[i+1]; j++ ) { 581596b8d2eSBarry Smith key = i*baij->n+aj[j]+1; 582596b8d2eSBarry Smith h1 = HASH1(size,key); 583bd7f49f5SSatish Balay h2 = HASH2(size,key); 584596b8d2eSBarry Smith 585596b8d2eSBarry Smith for ( k=1; k<size; k++ ){ 586596b8d2eSBarry Smith if (HT[(h1*k)%size] == 0.0) { 587596b8d2eSBarry Smith HT[(h1*k)%size] = key; 588596b8d2eSBarry Smith break; 589596b8d2eSBarry Smith } else ct++; 590596b8d2eSBarry Smith } 591bd7f49f5SSatish Balay if (k> max1) max1 = k; 592596b8d2eSBarry Smith } 593596b8d2eSBarry Smith } 594596b8d2eSBarry Smith /* Loop Over B */ 595596b8d2eSBarry Smith for ( i=0; i<b->n; i++ ) { 596596b8d2eSBarry Smith for ( j=bi[i]; j<bi[i+1]; j++ ) { 597596b8d2eSBarry Smith key = i*b->n+bj[j]+1; 598596b8d2eSBarry Smith h1 = HASH1(size,key); 599596b8d2eSBarry Smith for ( k=1; k<size; k++ ){ 600596b8d2eSBarry Smith if (HT[(h1*k)%size] == 0.0) { 601596b8d2eSBarry Smith HT[(h1*k)%size] = key; 602596b8d2eSBarry Smith break; 603596b8d2eSBarry Smith } else ct++; 604596b8d2eSBarry Smith } 605bd7f49f5SSatish Balay if (k> max2) max2 = k; 606596b8d2eSBarry Smith } 607596b8d2eSBarry Smith } 608596b8d2eSBarry Smith 609596b8d2eSBarry Smith /* Print Summary */ 610596b8d2eSBarry Smith for ( i=0,key=0.0,j=0; i<size; i++) 611596b8d2eSBarry Smith if (HT[i]) {j++;} 612596b8d2eSBarry Smith 613bd7f49f5SSatish Balay printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 614bd7f49f5SSatish Balay PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j)/j,j); 615bd7f49f5SSatish Balay PetscFree(HT); 6163a40ed3dSBarry Smith PetscFunctionReturn(0); 617596b8d2eSBarry Smith } 61857b952d6SSatish Balay 6195615d1e5SSatish Balay #undef __FUNC__ 6205615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 621ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 62257b952d6SSatish Balay { 62357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 62457b952d6SSatish Balay MPI_Status *send_status,recv_status; 62557b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 626596b8d2eSBarry Smith int bs=baij->bs,row,col,other_disassembled,flg; 62757b952d6SSatish Balay Scalar *values,val; 628e0fa3b82SLois Curfman McInnes InsertMode addv = mat->insertmode; 62957b952d6SSatish Balay 630d64ed03dSBarry Smith PetscFunctionBegin; 63157b952d6SSatish Balay /* wait on receives */ 63257b952d6SSatish Balay while (count) { 633ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 63457b952d6SSatish Balay /* unpack receives into our local space */ 63557b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 636ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 63757b952d6SSatish Balay n = n/3; 63857b952d6SSatish Balay for ( i=0; i<n; i++ ) { 63957b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 64057b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 64157b952d6SSatish Balay val = values[3*i+2]; 64257b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 64357b952d6SSatish Balay col -= baij->cstart*bs; 6446fd7127cSSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 645d64ed03dSBarry Smith } else { 64657b952d6SSatish Balay if (mat->was_assembled) { 647905e6a2fSBarry Smith if (!baij->colmap) { 648905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 649905e6a2fSBarry Smith } 650a5eb4965SSatish Balay col = (baij->colmap[col/bs]) - 1 + col%bs; 65157b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 65257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 65357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 65457b952d6SSatish Balay } 65557b952d6SSatish Balay } 6566fd7127cSSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 65757b952d6SSatish Balay } 65857b952d6SSatish Balay } 65957b952d6SSatish Balay count--; 66057b952d6SSatish Balay } 66157b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 66257b952d6SSatish Balay 66357b952d6SSatish Balay /* wait on sends */ 66457b952d6SSatish Balay if (baij->nsends) { 665d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 666ca161407SBarry Smith ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 66757b952d6SSatish Balay PetscFree(send_status); 66857b952d6SSatish Balay } 66957b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 67057b952d6SSatish Balay 67157b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 67257b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 67357b952d6SSatish Balay 67457b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 67557b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 676ca161407SBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 67757b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 67857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 67957b952d6SSatish Balay } 68057b952d6SSatish Balay 6816d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 68257b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 68357b952d6SSatish Balay } 68457b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 68557b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 68657b952d6SSatish Balay 687596b8d2eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 688bd7f49f5SSatish Balay if (flg) { 689*a07cd24cSSatish Balay double fact = 1.39; 690*a07cd24cSSatish Balay /* for ( fact=1.2; fact<2.0; fact +=0.05) */ CreateHashTable(mat,fact); 691bd7f49f5SSatish Balay } 69257b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 6933a40ed3dSBarry Smith PetscFunctionReturn(0); 69457b952d6SSatish Balay } 69557b952d6SSatish Balay 6965615d1e5SSatish Balay #undef __FUNC__ 6975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary" 69857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 69957b952d6SSatish Balay { 70057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 70157b952d6SSatish Balay int ierr; 70257b952d6SSatish Balay 703d64ed03dSBarry Smith PetscFunctionBegin; 70457b952d6SSatish Balay if (baij->size == 1) { 70557b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 706a8c6a408SBarry Smith } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 7073a40ed3dSBarry Smith PetscFunctionReturn(0); 70857b952d6SSatish Balay } 70957b952d6SSatish Balay 7105615d1e5SSatish Balay #undef __FUNC__ 7115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 71257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 71357b952d6SSatish Balay { 71457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 715cee3aa6bSSatish Balay int ierr, format,rank,bs = baij->bs; 71657b952d6SSatish Balay FILE *fd; 71757b952d6SSatish Balay ViewerType vtype; 71857b952d6SSatish Balay 719d64ed03dSBarry Smith PetscFunctionBegin; 72057b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 72157b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 72257b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 723639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 7244e220ebcSLois Curfman McInnes MatInfo info; 72557b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 72657b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 7274e220ebcSLois Curfman McInnes ierr = MatGetInfo(mat,MAT_LOCAL,&info); 72857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 72957b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 7304e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 7314e220ebcSLois Curfman McInnes baij->bs,(int)info.memory); 7324e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 7334e220ebcSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 7344e220ebcSLois Curfman McInnes ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 7354e220ebcSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 73657b952d6SSatish Balay fflush(fd); 73757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 73857b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 7393a40ed3dSBarry Smith PetscFunctionReturn(0); 740d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 741bcc3fcf6SBarry Smith PetscPrintf(mat->comm," block size is %d\n",bs); 7423a40ed3dSBarry Smith PetscFunctionReturn(0); 74357b952d6SSatish Balay } 74457b952d6SSatish Balay } 74557b952d6SSatish Balay 74657b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 74757b952d6SSatish Balay Draw draw; 74857b952d6SSatish Balay PetscTruth isnull; 74957b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 7503a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 75157b952d6SSatish Balay } 75257b952d6SSatish Balay 75357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 75457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 75557b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 75657b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 75757b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 75857b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 75957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 76057b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 76157b952d6SSatish Balay fflush(fd); 76257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 763d64ed03dSBarry Smith } else { 76457b952d6SSatish Balay int size = baij->size; 76557b952d6SSatish Balay rank = baij->rank; 76657b952d6SSatish Balay if (size == 1) { 76757b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 768d64ed03dSBarry Smith } else { 76957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 77057b952d6SSatish Balay Mat A; 77157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 77257b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 77357b952d6SSatish Balay int mbs=baij->mbs; 77457b952d6SSatish Balay Scalar *a; 77557b952d6SSatish Balay 77657b952d6SSatish Balay if (!rank) { 77755843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 778d64ed03dSBarry Smith } else { 77955843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 78057b952d6SSatish Balay } 78157b952d6SSatish Balay PLogObjectParent(mat,A); 78257b952d6SSatish Balay 78357b952d6SSatish Balay /* copy over the A part */ 78457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 78557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 78657b952d6SSatish Balay row = baij->rstart; 78757b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 78857b952d6SSatish Balay 78957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 79057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 79157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 79257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 79357b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 79457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 795cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 796cee3aa6bSSatish Balay col++; a += bs; 79757b952d6SSatish Balay } 79857b952d6SSatish Balay } 79957b952d6SSatish Balay } 80057b952d6SSatish Balay /* copy over the B part */ 80157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 80257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 80357b952d6SSatish Balay row = baij->rstart*bs; 80457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 80557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 80657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 80757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 80857b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 80957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 810cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 811cee3aa6bSSatish Balay col++; a += bs; 81257b952d6SSatish Balay } 81357b952d6SSatish Balay } 81457b952d6SSatish Balay } 81557b952d6SSatish Balay PetscFree(rvals); 8166d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 8176d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 81855843e3eSBarry Smith /* 81955843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 82055843e3eSBarry Smith synchronized across all processors that share the Draw object 82155843e3eSBarry Smith */ 82255843e3eSBarry Smith if (!rank || vtype == DRAW_VIEWER) { 82357b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 82457b952d6SSatish Balay } 82557b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 82657b952d6SSatish Balay } 82757b952d6SSatish Balay } 8283a40ed3dSBarry Smith PetscFunctionReturn(0); 82957b952d6SSatish Balay } 83057b952d6SSatish Balay 83157b952d6SSatish Balay 83257b952d6SSatish Balay 8335615d1e5SSatish Balay #undef __FUNC__ 8345615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ" 835ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 83657b952d6SSatish Balay { 83757b952d6SSatish Balay Mat mat = (Mat) obj; 83857b952d6SSatish Balay int ierr; 83957b952d6SSatish Balay ViewerType vtype; 84057b952d6SSatish Balay 841d64ed03dSBarry Smith PetscFunctionBegin; 84257b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 84357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 84457b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 84557b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 8463a40ed3dSBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 8473a40ed3dSBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 84857b952d6SSatish Balay } 8493a40ed3dSBarry Smith PetscFunctionReturn(0); 85057b952d6SSatish Balay } 85157b952d6SSatish Balay 8525615d1e5SSatish Balay #undef __FUNC__ 8535615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ" 854ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj) 85579bdfe76SSatish Balay { 85679bdfe76SSatish Balay Mat mat = (Mat) obj; 85779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 85879bdfe76SSatish Balay int ierr; 85979bdfe76SSatish Balay 860d64ed03dSBarry Smith PetscFunctionBegin; 8613a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 86279bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 86379bdfe76SSatish Balay #endif 86479bdfe76SSatish Balay 86583e2fdc7SBarry Smith ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 86679bdfe76SSatish Balay PetscFree(baij->rowners); 86779bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 86879bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 86979bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 87079bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 87179bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 87279bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 87379bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 87430793edcSSatish Balay if (baij->barray) PetscFree(baij->barray); 87579bdfe76SSatish Balay PetscFree(baij); 87679bdfe76SSatish Balay PLogObjectDestroy(mat); 87779bdfe76SSatish Balay PetscHeaderDestroy(mat); 8783a40ed3dSBarry Smith PetscFunctionReturn(0); 87979bdfe76SSatish Balay } 88079bdfe76SSatish Balay 8815615d1e5SSatish Balay #undef __FUNC__ 8825615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ" 883ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 884cee3aa6bSSatish Balay { 885cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 88647b4a8eaSLois Curfman McInnes int ierr, nt; 887cee3aa6bSSatish Balay 888d64ed03dSBarry Smith PetscFunctionBegin; 889c16cb8f2SBarry Smith VecGetLocalSize_Fast(xx,nt); 89047b4a8eaSLois Curfman McInnes if (nt != a->n) { 891a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 89247b4a8eaSLois Curfman McInnes } 893c16cb8f2SBarry Smith VecGetLocalSize_Fast(yy,nt); 89447b4a8eaSLois Curfman McInnes if (nt != a->m) { 895a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 89647b4a8eaSLois Curfman McInnes } 89743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 898cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 89943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 900cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 90143a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 9023a40ed3dSBarry Smith PetscFunctionReturn(0); 903cee3aa6bSSatish Balay } 904cee3aa6bSSatish Balay 9055615d1e5SSatish Balay #undef __FUNC__ 9065615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ" 907ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 908cee3aa6bSSatish Balay { 909cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 910cee3aa6bSSatish Balay int ierr; 911d64ed03dSBarry Smith 912d64ed03dSBarry Smith PetscFunctionBegin; 91343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 914cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 91543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 916cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 9173a40ed3dSBarry Smith PetscFunctionReturn(0); 918cee3aa6bSSatish Balay } 919cee3aa6bSSatish Balay 9205615d1e5SSatish Balay #undef __FUNC__ 9215615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ" 922ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 923cee3aa6bSSatish Balay { 924cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 925cee3aa6bSSatish Balay int ierr; 926cee3aa6bSSatish Balay 927d64ed03dSBarry Smith PetscFunctionBegin; 928cee3aa6bSSatish Balay /* do nondiagonal part */ 929cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 930cee3aa6bSSatish Balay /* send it on its way */ 931537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 932cee3aa6bSSatish Balay /* do local part */ 933cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 934cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 935cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 936cee3aa6bSSatish Balay /* but is not perhaps always true. */ 937639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 9383a40ed3dSBarry Smith PetscFunctionReturn(0); 939cee3aa6bSSatish Balay } 940cee3aa6bSSatish Balay 9415615d1e5SSatish Balay #undef __FUNC__ 9425615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 943ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 944cee3aa6bSSatish Balay { 945cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 946cee3aa6bSSatish Balay int ierr; 947cee3aa6bSSatish Balay 948d64ed03dSBarry Smith PetscFunctionBegin; 949cee3aa6bSSatish Balay /* do nondiagonal part */ 950cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 951cee3aa6bSSatish Balay /* send it on its way */ 952537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 953cee3aa6bSSatish Balay /* do local part */ 954cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 955cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 956cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 957cee3aa6bSSatish Balay /* but is not perhaps always true. */ 958537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 9593a40ed3dSBarry Smith PetscFunctionReturn(0); 960cee3aa6bSSatish Balay } 961cee3aa6bSSatish Balay 962cee3aa6bSSatish Balay /* 963cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 964cee3aa6bSSatish Balay diagonal block 965cee3aa6bSSatish Balay */ 9665615d1e5SSatish Balay #undef __FUNC__ 9675615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 968ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 969cee3aa6bSSatish Balay { 970cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 9713a40ed3dSBarry Smith int ierr; 972d64ed03dSBarry Smith 973d64ed03dSBarry Smith PetscFunctionBegin; 974a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 9753a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 9763a40ed3dSBarry Smith PetscFunctionReturn(0); 977cee3aa6bSSatish Balay } 978cee3aa6bSSatish Balay 9795615d1e5SSatish Balay #undef __FUNC__ 9805615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ" 981ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 982cee3aa6bSSatish Balay { 983cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 984cee3aa6bSSatish Balay int ierr; 985d64ed03dSBarry Smith 986d64ed03dSBarry Smith PetscFunctionBegin; 987cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 988cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 9893a40ed3dSBarry Smith PetscFunctionReturn(0); 990cee3aa6bSSatish Balay } 991026e39d0SSatish Balay 9925615d1e5SSatish Balay #undef __FUNC__ 9935615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ" 994ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 99557b952d6SSatish Balay { 99657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 997d64ed03dSBarry Smith 998d64ed03dSBarry Smith PetscFunctionBegin; 999bd7f49f5SSatish Balay if (m) *m = mat->M; 1000bd7f49f5SSatish Balay if (n) *n = mat->N; 10013a40ed3dSBarry Smith PetscFunctionReturn(0); 100257b952d6SSatish Balay } 100357b952d6SSatish Balay 10045615d1e5SSatish Balay #undef __FUNC__ 10055615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1006ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 100757b952d6SSatish Balay { 100857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1009d64ed03dSBarry Smith 1010d64ed03dSBarry Smith PetscFunctionBegin; 101157b952d6SSatish Balay *m = mat->m; *n = mat->N; 10123a40ed3dSBarry Smith PetscFunctionReturn(0); 101357b952d6SSatish Balay } 101457b952d6SSatish Balay 10155615d1e5SSatish Balay #undef __FUNC__ 10165615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1017ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 101857b952d6SSatish Balay { 101957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1020d64ed03dSBarry Smith 1021d64ed03dSBarry Smith PetscFunctionBegin; 102257b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 10233a40ed3dSBarry Smith PetscFunctionReturn(0); 102457b952d6SSatish Balay } 102557b952d6SSatish Balay 1026acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1027acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1028acdf5bf4SSatish Balay 10295615d1e5SSatish Balay #undef __FUNC__ 10305615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ" 1031acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1032acdf5bf4SSatish Balay { 1033acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1034acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1035acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1036d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1037d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 1038acdf5bf4SSatish Balay 1039d64ed03dSBarry Smith PetscFunctionBegin; 1040a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1041acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1042acdf5bf4SSatish Balay 1043acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1044acdf5bf4SSatish Balay /* 1045acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1046acdf5bf4SSatish Balay */ 1047acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1048bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1049bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 1050acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1051acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1052acdf5bf4SSatish Balay } 1053acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1054acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 1055acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1056acdf5bf4SSatish Balay } 1057acdf5bf4SSatish Balay 1058a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1059d9d09a02SSatish Balay lrow = row - brstart; 1060acdf5bf4SSatish Balay 1061acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1062acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1063acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1064acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1065acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1066acdf5bf4SSatish Balay nztot = nzA + nzB; 1067acdf5bf4SSatish Balay 1068acdf5bf4SSatish Balay cmap = mat->garray; 1069acdf5bf4SSatish Balay if (v || idx) { 1070acdf5bf4SSatish Balay if (nztot) { 1071acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1072acdf5bf4SSatish Balay int imark = -1; 1073acdf5bf4SSatish Balay if (v) { 1074acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1075acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1076d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1077acdf5bf4SSatish Balay else break; 1078acdf5bf4SSatish Balay } 1079acdf5bf4SSatish Balay imark = i; 1080acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1081acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1082acdf5bf4SSatish Balay } 1083acdf5bf4SSatish Balay if (idx) { 1084acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1085acdf5bf4SSatish Balay if (imark > -1) { 1086acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 1087bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1088acdf5bf4SSatish Balay } 1089acdf5bf4SSatish Balay } else { 1090acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 1091d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1092d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1093acdf5bf4SSatish Balay else break; 1094acdf5bf4SSatish Balay } 1095acdf5bf4SSatish Balay imark = i; 1096acdf5bf4SSatish Balay } 1097d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1098d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1099acdf5bf4SSatish Balay } 1100d64ed03dSBarry Smith } else { 1101d212a18eSSatish Balay if (idx) *idx = 0; 1102d212a18eSSatish Balay if (v) *v = 0; 1103d212a18eSSatish Balay } 1104acdf5bf4SSatish Balay } 1105acdf5bf4SSatish Balay *nz = nztot; 1106acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1107acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 11083a40ed3dSBarry Smith PetscFunctionReturn(0); 1109acdf5bf4SSatish Balay } 1110acdf5bf4SSatish Balay 11115615d1e5SSatish Balay #undef __FUNC__ 11125615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1113acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1114acdf5bf4SSatish Balay { 1115acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1116d64ed03dSBarry Smith 1117d64ed03dSBarry Smith PetscFunctionBegin; 1118acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1119a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1120acdf5bf4SSatish Balay } 1121acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 11223a40ed3dSBarry Smith PetscFunctionReturn(0); 1123acdf5bf4SSatish Balay } 1124acdf5bf4SSatish Balay 11255615d1e5SSatish Balay #undef __FUNC__ 11265615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1127ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 11285a838052SSatish Balay { 11295a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1130d64ed03dSBarry Smith 1131d64ed03dSBarry Smith PetscFunctionBegin; 11325a838052SSatish Balay *bs = baij->bs; 11333a40ed3dSBarry Smith PetscFunctionReturn(0); 11345a838052SSatish Balay } 11355a838052SSatish Balay 11365615d1e5SSatish Balay #undef __FUNC__ 11375615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1138ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 113958667388SSatish Balay { 114058667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 114158667388SSatish Balay int ierr; 1142d64ed03dSBarry Smith 1143d64ed03dSBarry Smith PetscFunctionBegin; 114458667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 114558667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 11463a40ed3dSBarry Smith PetscFunctionReturn(0); 114758667388SSatish Balay } 11480ac07820SSatish Balay 11495615d1e5SSatish Balay #undef __FUNC__ 11505615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ" 1151ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 11520ac07820SSatish Balay { 11534e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 11544e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 11557d57db60SLois Curfman McInnes int ierr; 11567d57db60SLois Curfman McInnes double isend[5], irecv[5]; 11570ac07820SSatish Balay 1158d64ed03dSBarry Smith PetscFunctionBegin; 11594e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 11604e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 11614e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 11624e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 11634e220ebcSLois Curfman McInnes isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 11640ac07820SSatish Balay if (flag == MAT_LOCAL) { 11654e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 11664e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 11674e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 11684e220ebcSLois Curfman McInnes info->memory = isend[3]; 11694e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 11700ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1171ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 11724e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11734e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11744e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11754e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11764e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11770ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1178ca161407SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 11794e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 11804e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 11814e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 11824e220ebcSLois Curfman McInnes info->memory = irecv[3]; 11834e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 11840ac07820SSatish Balay } 11855a5d4f66SBarry Smith info->rows_global = (double)a->M; 11865a5d4f66SBarry Smith info->columns_global = (double)a->N; 11875a5d4f66SBarry Smith info->rows_local = (double)a->m; 11885a5d4f66SBarry Smith info->columns_local = (double)a->N; 11894e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 11904e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 11914e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 11923a40ed3dSBarry Smith PetscFunctionReturn(0); 11930ac07820SSatish Balay } 11940ac07820SSatish Balay 11955615d1e5SSatish Balay #undef __FUNC__ 11965615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ" 1197ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 119858667388SSatish Balay { 119958667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 120058667388SSatish Balay 1201d64ed03dSBarry Smith PetscFunctionBegin; 120258667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 120358667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 12046da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1205c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 120696854ed6SLois Curfman McInnes op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1207c2653b3dSLois Curfman McInnes op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1208b1fbbac0SLois Curfman McInnes MatSetOption(a->A,op); 1209b1fbbac0SLois Curfman McInnes MatSetOption(a->B,op); 1210b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 1211aeafbbfcSLois Curfman McInnes a->roworiented = 1; 121258667388SSatish Balay MatSetOption(a->A,op); 121358667388SSatish Balay MatSetOption(a->B,op); 1214b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 12156da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 121658667388SSatish Balay op == MAT_SYMMETRIC || 121758667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 121858667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 121958667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 122058667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 122158667388SSatish Balay a->roworiented = 0; 122258667388SSatish Balay MatSetOption(a->A,op); 122358667388SSatish Balay MatSetOption(a->B,op); 12242b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 122590f02eecSBarry Smith a->donotstash = 1; 1226d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1227d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1228d64ed03dSBarry Smith } else { 1229d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1230d64ed03dSBarry Smith } 12313a40ed3dSBarry Smith PetscFunctionReturn(0); 123258667388SSatish Balay } 123358667388SSatish Balay 12345615d1e5SSatish Balay #undef __FUNC__ 12355615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ(" 1236ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 12370ac07820SSatish Balay { 12380ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 12390ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 12400ac07820SSatish Balay Mat B; 12410ac07820SSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 12420ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 12430ac07820SSatish Balay Scalar *a; 12440ac07820SSatish Balay 1245d64ed03dSBarry Smith PetscFunctionBegin; 1246a8c6a408SBarry Smith if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 12470ac07820SSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 12480ac07820SSatish Balay CHKERRQ(ierr); 12490ac07820SSatish Balay 12500ac07820SSatish Balay /* copy over the A part */ 12510ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 12520ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12530ac07820SSatish Balay row = baij->rstart; 12540ac07820SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 12550ac07820SSatish Balay 12560ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12570ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12580ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12590ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12600ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 12610ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12620ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12630ac07820SSatish Balay col++; a += bs; 12640ac07820SSatish Balay } 12650ac07820SSatish Balay } 12660ac07820SSatish Balay } 12670ac07820SSatish Balay /* copy over the B part */ 12680ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 12690ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 12700ac07820SSatish Balay row = baij->rstart*bs; 12710ac07820SSatish Balay for ( i=0; i<mbs; i++ ) { 12720ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 12730ac07820SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 12740ac07820SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 12750ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 12760ac07820SSatish Balay for (k=0; k<bs; k++ ) { 12770ac07820SSatish Balay ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 12780ac07820SSatish Balay col++; a += bs; 12790ac07820SSatish Balay } 12800ac07820SSatish Balay } 12810ac07820SSatish Balay } 12820ac07820SSatish Balay PetscFree(rvals); 12830ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12840ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12850ac07820SSatish Balay 12860ac07820SSatish Balay if (matout != PETSC_NULL) { 12870ac07820SSatish Balay *matout = B; 12880ac07820SSatish Balay } else { 12890ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 12900ac07820SSatish Balay PetscFree(baij->rowners); 12910ac07820SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 12920ac07820SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 12930ac07820SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 12940ac07820SSatish Balay if (baij->garray) PetscFree(baij->garray); 12950ac07820SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 12960ac07820SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 12970ac07820SSatish Balay PetscFree(baij); 1298f09e8eb9SSatish Balay PetscMemcpy(A,B,sizeof(struct _p_Mat)); 12990ac07820SSatish Balay PetscHeaderDestroy(B); 13000ac07820SSatish Balay } 13013a40ed3dSBarry Smith PetscFunctionReturn(0); 13020ac07820SSatish Balay } 13030e95ebc0SSatish Balay 13045615d1e5SSatish Balay #undef __FUNC__ 13055615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 13060e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 13070e95ebc0SSatish Balay { 13080e95ebc0SSatish Balay Mat a = ((Mat_MPIBAIJ *) A->data)->A; 13090e95ebc0SSatish Balay Mat b = ((Mat_MPIBAIJ *) A->data)->B; 13100e95ebc0SSatish Balay int ierr,s1,s2,s3; 13110e95ebc0SSatish Balay 1312d64ed03dSBarry Smith PetscFunctionBegin; 13130e95ebc0SSatish Balay if (ll) { 13140e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 13150e95ebc0SSatish Balay ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1316a8c6a408SBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 13170e95ebc0SSatish Balay ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 13180e95ebc0SSatish Balay ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 13190e95ebc0SSatish Balay } 1320a8c6a408SBarry Smith if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 13213a40ed3dSBarry Smith PetscFunctionReturn(0); 13220e95ebc0SSatish Balay } 13230e95ebc0SSatish Balay 13245615d1e5SSatish Balay #undef __FUNC__ 13255615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ" 1326ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 13270ac07820SSatish Balay { 13280ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 13290ac07820SSatish Balay int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1330*a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 13310ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 13320ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1333*a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 13340ac07820SSatish Balay MPI_Comm comm = A->comm; 13350ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 13360ac07820SSatish Balay MPI_Status recv_status,*send_status; 13370ac07820SSatish Balay IS istmp; 13380ac07820SSatish Balay 1339d64ed03dSBarry Smith PetscFunctionBegin; 13400ac07820SSatish Balay ierr = ISGetSize(is,&N); CHKERRQ(ierr); 13410ac07820SSatish Balay ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 13420ac07820SSatish Balay 13430ac07820SSatish Balay /* first count number of contributors to each processor */ 13440ac07820SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 13450ac07820SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 13460ac07820SSatish Balay owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 13470ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13480ac07820SSatish Balay idx = rows[i]; 13490ac07820SSatish Balay found = 0; 13500ac07820SSatish Balay for ( j=0; j<size; j++ ) { 13510ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 13520ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 13530ac07820SSatish Balay } 13540ac07820SSatish Balay } 1355a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 13560ac07820SSatish Balay } 13570ac07820SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 13580ac07820SSatish Balay 13590ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 13600ac07820SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1361ca161407SBarry Smith ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 13620ac07820SSatish Balay nrecvs = work[rank]; 1363ca161407SBarry Smith ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 13640ac07820SSatish Balay nmax = work[rank]; 13650ac07820SSatish Balay PetscFree(work); 13660ac07820SSatish Balay 13670ac07820SSatish Balay /* post receives: */ 1368d64ed03dSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1369d64ed03dSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 13700ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 1371ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 13720ac07820SSatish Balay } 13730ac07820SSatish Balay 13740ac07820SSatish Balay /* do sends: 13750ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 13760ac07820SSatish Balay the ith processor 13770ac07820SSatish Balay */ 13780ac07820SSatish Balay svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1379ca161407SBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 13800ac07820SSatish Balay starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 13810ac07820SSatish Balay starts[0] = 0; 13820ac07820SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13830ac07820SSatish Balay for ( i=0; i<N; i++ ) { 13840ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 13850ac07820SSatish Balay } 13860ac07820SSatish Balay ISRestoreIndices(is,&rows); 13870ac07820SSatish Balay 13880ac07820SSatish Balay starts[0] = 0; 13890ac07820SSatish Balay for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 13900ac07820SSatish Balay count = 0; 13910ac07820SSatish Balay for ( i=0; i<size; i++ ) { 13920ac07820SSatish Balay if (procs[i]) { 1393ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 13940ac07820SSatish Balay } 13950ac07820SSatish Balay } 13960ac07820SSatish Balay PetscFree(starts); 13970ac07820SSatish Balay 13980ac07820SSatish Balay base = owners[rank]*bs; 13990ac07820SSatish Balay 14000ac07820SSatish Balay /* wait on receives */ 14010ac07820SSatish Balay lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 14020ac07820SSatish Balay source = lens + nrecvs; 14030ac07820SSatish Balay count = nrecvs; slen = 0; 14040ac07820SSatish Balay while (count) { 1405ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 14060ac07820SSatish Balay /* unpack receives into our local space */ 1407ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 14080ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 14090ac07820SSatish Balay lens[imdex] = n; 14100ac07820SSatish Balay slen += n; 14110ac07820SSatish Balay count--; 14120ac07820SSatish Balay } 14130ac07820SSatish Balay PetscFree(recv_waits); 14140ac07820SSatish Balay 14150ac07820SSatish Balay /* move the data into the send scatter */ 14160ac07820SSatish Balay lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 14170ac07820SSatish Balay count = 0; 14180ac07820SSatish Balay for ( i=0; i<nrecvs; i++ ) { 14190ac07820SSatish Balay values = rvalues + i*nmax; 14200ac07820SSatish Balay for ( j=0; j<lens[i]; j++ ) { 14210ac07820SSatish Balay lrows[count++] = values[j] - base; 14220ac07820SSatish Balay } 14230ac07820SSatish Balay } 14240ac07820SSatish Balay PetscFree(rvalues); PetscFree(lens); 14250ac07820SSatish Balay PetscFree(owner); PetscFree(nprocs); 14260ac07820SSatish Balay 14270ac07820SSatish Balay /* actually zap the local rows */ 1428029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 14290ac07820SSatish Balay PLogObjectParent(A,istmp); 1430*a07cd24cSSatish Balay 1431*a07cd24cSSatish Balay ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 14320ac07820SSatish Balay ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 14330ac07820SSatish Balay ierr = ISDestroy(istmp); CHKERRQ(ierr); 14340ac07820SSatish Balay 1435*a07cd24cSSatish Balay if (diag) { 1436*a07cd24cSSatish Balay for ( i = 0; i < slen; i++ ) { 1437*a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1438*a07cd24cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1439*a07cd24cSSatish Balay } 1440*a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1441*a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1442*a07cd24cSSatish Balay } 1443*a07cd24cSSatish Balay PetscFree(lrows); 1444*a07cd24cSSatish Balay 14450ac07820SSatish Balay /* wait on sends */ 14460ac07820SSatish Balay if (nsends) { 1447d64ed03dSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1448ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 14490ac07820SSatish Balay PetscFree(send_status); 14500ac07820SSatish Balay } 14510ac07820SSatish Balay PetscFree(send_waits); PetscFree(svalues); 14520ac07820SSatish Balay 14533a40ed3dSBarry Smith PetscFunctionReturn(0); 14540ac07820SSatish Balay } 1455ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 14565615d1e5SSatish Balay #undef __FUNC__ 14575615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1458ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1459ba4ca20aSSatish Balay { 1460ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 14613a40ed3dSBarry Smith int ierr; 1462ba4ca20aSSatish Balay 1463d64ed03dSBarry Smith PetscFunctionBegin; 14643a40ed3dSBarry Smith if (!a->rank) { 14653a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 14663a40ed3dSBarry Smith } 14673a40ed3dSBarry Smith PetscFunctionReturn(0); 1468ba4ca20aSSatish Balay } 14690ac07820SSatish Balay 14705615d1e5SSatish Balay #undef __FUNC__ 14715615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1472ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1473bb5a7306SBarry Smith { 1474bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1475bb5a7306SBarry Smith int ierr; 1476d64ed03dSBarry Smith 1477d64ed03dSBarry Smith PetscFunctionBegin; 1478bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 14793a40ed3dSBarry Smith PetscFunctionReturn(0); 1480bb5a7306SBarry Smith } 1481bb5a7306SBarry Smith 14820ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 14830ac07820SSatish Balay 148479bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 148579bdfe76SSatish Balay static struct _MatOps MatOps = { 1486bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 14874c50302cSBarry Smith MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 14884c50302cSBarry Smith 0,0,0,0, 14890ac07820SSatish Balay 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 14900e95ebc0SSatish Balay 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 149158667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 14924c50302cSBarry Smith MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 14934c50302cSBarry Smith 0,0,0,MatGetSize_MPIBAIJ, 14944c50302cSBarry Smith MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 149594a9d846SBarry Smith 0,0,MatConvertSameType_MPIBAIJ,0,0, 1496d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 1497ba4ca20aSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1498bb5a7306SBarry Smith MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1499ab26458aSBarry Smith 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 150079bdfe76SSatish Balay 150179bdfe76SSatish Balay 15025615d1e5SSatish Balay #undef __FUNC__ 15035615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ" 150479bdfe76SSatish Balay /*@C 150579bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 150679bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 150779bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 150879bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 150979bdfe76SSatish Balay performance can be increased by more than a factor of 50. 151079bdfe76SSatish Balay 151179bdfe76SSatish Balay Input Parameters: 151279bdfe76SSatish Balay . comm - MPI communicator 151379bdfe76SSatish Balay . bs - size of blockk 151479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 151592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 151692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 151792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 151892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 151992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 152079bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 152192e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 152279bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 152379bdfe76SSatish Balay submatrix (same for all local rows) 152492e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 152592e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 152692e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 152792e8d321SLois Curfman McInnes it is zero. 152892e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 152979bdfe76SSatish Balay submatrix (same for all local rows). 153092e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 153192e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 153292e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 153379bdfe76SSatish Balay 153479bdfe76SSatish Balay Output Parameter: 153579bdfe76SSatish Balay . A - the matrix 153679bdfe76SSatish Balay 153779bdfe76SSatish Balay Notes: 153879bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 153979bdfe76SSatish Balay (possibly both). 154079bdfe76SSatish Balay 154179bdfe76SSatish Balay Storage Information: 154279bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 154379bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 154479bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 154579bdfe76SSatish Balay local matrix (a rectangular submatrix). 154679bdfe76SSatish Balay 154779bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 154879bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 154979bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 155079bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 155179bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 155279bdfe76SSatish Balay 155379bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 155479bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 155579bdfe76SSatish Balay 155679bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 155779bdfe76SSatish Balay $ ------------------- 155879bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 155979bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 156079bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 156179bdfe76SSatish Balay $ ------------------- 156279bdfe76SSatish Balay $ 156379bdfe76SSatish Balay 156479bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 156579bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 156679bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 156757b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 156879bdfe76SSatish Balay 1569d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1570d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 157179bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 157292e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 157392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 15746da5968aSLois Curfman McInnes matrices. 157579bdfe76SSatish Balay 157692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 157779bdfe76SSatish Balay 157879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 157979bdfe76SSatish Balay @*/ 158079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 158179bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 158279bdfe76SSatish Balay { 158379bdfe76SSatish Balay Mat B; 158479bdfe76SSatish Balay Mat_MPIBAIJ *b; 15854c50302cSBarry Smith int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 158679bdfe76SSatish Balay 1587d64ed03dSBarry Smith PetscFunctionBegin; 1588a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 15893914022bSBarry Smith 15903914022bSBarry Smith MPI_Comm_size(comm,&size); 15913914022bSBarry Smith if (size == 1) { 15923914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 15933914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 15943914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 15953a40ed3dSBarry Smith PetscFunctionReturn(0); 15963914022bSBarry Smith } 15973914022bSBarry Smith 159879bdfe76SSatish Balay *A = 0; 1599d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 160079bdfe76SSatish Balay PLogObjectCreate(B); 160179bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 160279bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 160379bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 16044c50302cSBarry Smith 160579bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 160679bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 160790f02eecSBarry Smith B->mapping = 0; 160879bdfe76SSatish Balay B->factor = 0; 160979bdfe76SSatish Balay B->assembled = PETSC_FALSE; 161079bdfe76SSatish Balay 1611e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 161279bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 161379bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 161479bdfe76SSatish Balay 1615d64ed03dSBarry Smith if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1616a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1617d64ed03dSBarry Smith } 1618a8c6a408SBarry Smith if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1619a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1620a8c6a408SBarry Smith } 1621a8c6a408SBarry Smith if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1622a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1623a8c6a408SBarry Smith } 1624cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1625cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 162679bdfe76SSatish Balay 162779bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 162879bdfe76SSatish Balay work[0] = m; work[1] = n; 162979bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 1630ca161407SBarry Smith ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 163179bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 163279bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 163379bdfe76SSatish Balay } 163479bdfe76SSatish Balay if (m == PETSC_DECIDE) { 163579bdfe76SSatish Balay Mbs = M/bs; 1636a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 163779bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 163879bdfe76SSatish Balay m = mbs*bs; 163979bdfe76SSatish Balay } 164079bdfe76SSatish Balay if (n == PETSC_DECIDE) { 164179bdfe76SSatish Balay Nbs = N/bs; 1642a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 164379bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 164479bdfe76SSatish Balay n = nbs*bs; 164579bdfe76SSatish Balay } 1646a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 1647a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1648a8c6a408SBarry Smith } 164979bdfe76SSatish Balay 165079bdfe76SSatish Balay b->m = m; B->m = m; 165179bdfe76SSatish Balay b->n = n; B->n = n; 165279bdfe76SSatish Balay b->N = N; B->N = N; 165379bdfe76SSatish Balay b->M = M; B->M = M; 165479bdfe76SSatish Balay b->bs = bs; 165579bdfe76SSatish Balay b->bs2 = bs*bs; 165679bdfe76SSatish Balay b->mbs = mbs; 165779bdfe76SSatish Balay b->nbs = nbs; 165879bdfe76SSatish Balay b->Mbs = Mbs; 165979bdfe76SSatish Balay b->Nbs = Nbs; 166079bdfe76SSatish Balay 166179bdfe76SSatish Balay /* build local table of row and column ownerships */ 166279bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1663f09e8eb9SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 16640ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 1665ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 166679bdfe76SSatish Balay b->rowners[0] = 0; 166779bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 166879bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 166979bdfe76SSatish Balay } 167079bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 167179bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 16724fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 16734fa0d573SSatish Balay b->rend_bs = b->rend * bs; 16744fa0d573SSatish Balay 1675ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 167679bdfe76SSatish Balay b->cowners[0] = 0; 167779bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 167879bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 167979bdfe76SSatish Balay } 168079bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 168179bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 16824fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 16834fa0d573SSatish Balay b->cend_bs = b->cend * bs; 168479bdfe76SSatish Balay 1685*a07cd24cSSatish Balay 168679bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 1687029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 168879bdfe76SSatish Balay PLogObjectParent(B,b->A); 168979bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 1690029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 169179bdfe76SSatish Balay PLogObjectParent(B,b->B); 169279bdfe76SSatish Balay 169379bdfe76SSatish Balay /* build cache for off array entries formed */ 169479bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 169590f02eecSBarry Smith b->donotstash = 0; 169679bdfe76SSatish Balay b->colmap = 0; 169779bdfe76SSatish Balay b->garray = 0; 169879bdfe76SSatish Balay b->roworiented = 1; 169979bdfe76SSatish Balay 170030793edcSSatish Balay /* stuff used in block assembly */ 170130793edcSSatish Balay b->barray = 0; 170230793edcSSatish Balay 170379bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 170479bdfe76SSatish Balay b->lvec = 0; 170579bdfe76SSatish Balay b->Mvctx = 0; 170679bdfe76SSatish Balay 170779bdfe76SSatish Balay /* stuff for MatGetRow() */ 170879bdfe76SSatish Balay b->rowindices = 0; 170979bdfe76SSatish Balay b->rowvalues = 0; 171079bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 171179bdfe76SSatish Balay 1712*a07cd24cSSatish Balay /* hash table stuff */ 1713*a07cd24cSSatish Balay b->ht = 0; 1714*a07cd24cSSatish Balay 171579bdfe76SSatish Balay *A = B; 17163a40ed3dSBarry Smith PetscFunctionReturn(0); 171779bdfe76SSatish Balay } 1718026e39d0SSatish Balay 17195615d1e5SSatish Balay #undef __FUNC__ 17205615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ" 17210ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 17220ac07820SSatish Balay { 17230ac07820SSatish Balay Mat mat; 17240ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 17250ac07820SSatish Balay int ierr, len=0, flg; 17260ac07820SSatish Balay 1727d64ed03dSBarry Smith PetscFunctionBegin; 17280ac07820SSatish Balay *newmat = 0; 1729d4bb536fSBarry Smith PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 17300ac07820SSatish Balay PLogObjectCreate(mat); 17310ac07820SSatish Balay mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 17320ac07820SSatish Balay PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 17330ac07820SSatish Balay mat->destroy = MatDestroy_MPIBAIJ; 17340ac07820SSatish Balay mat->view = MatView_MPIBAIJ; 17350ac07820SSatish Balay mat->factor = matin->factor; 17360ac07820SSatish Balay mat->assembled = PETSC_TRUE; 17370ac07820SSatish Balay 17380ac07820SSatish Balay a->m = mat->m = oldmat->m; 17390ac07820SSatish Balay a->n = mat->n = oldmat->n; 17400ac07820SSatish Balay a->M = mat->M = oldmat->M; 17410ac07820SSatish Balay a->N = mat->N = oldmat->N; 17420ac07820SSatish Balay 17430ac07820SSatish Balay a->bs = oldmat->bs; 17440ac07820SSatish Balay a->bs2 = oldmat->bs2; 17450ac07820SSatish Balay a->mbs = oldmat->mbs; 17460ac07820SSatish Balay a->nbs = oldmat->nbs; 17470ac07820SSatish Balay a->Mbs = oldmat->Mbs; 17480ac07820SSatish Balay a->Nbs = oldmat->Nbs; 17490ac07820SSatish Balay 17500ac07820SSatish Balay a->rstart = oldmat->rstart; 17510ac07820SSatish Balay a->rend = oldmat->rend; 17520ac07820SSatish Balay a->cstart = oldmat->cstart; 17530ac07820SSatish Balay a->cend = oldmat->cend; 17540ac07820SSatish Balay a->size = oldmat->size; 17550ac07820SSatish Balay a->rank = oldmat->rank; 1756e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 17570ac07820SSatish Balay a->rowvalues = 0; 17580ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 175930793edcSSatish Balay a->barray = 0; 17600ac07820SSatish Balay 17610ac07820SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1762f09e8eb9SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 17630ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 17640ac07820SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 17650ac07820SSatish Balay ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 17660ac07820SSatish Balay if (oldmat->colmap) { 17670ac07820SSatish Balay a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 17680ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 17690ac07820SSatish Balay PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 17700ac07820SSatish Balay } else a->colmap = 0; 17710ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 17720ac07820SSatish Balay a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 17730ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 17740ac07820SSatish Balay PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 17750ac07820SSatish Balay } else a->garray = 0; 17760ac07820SSatish Balay 17770ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 17780ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 17790ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 17800ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 17810ac07820SSatish Balay ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 17820ac07820SSatish Balay PLogObjectParent(mat,a->A); 17830ac07820SSatish Balay ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 17840ac07820SSatish Balay PLogObjectParent(mat,a->B); 17850ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 17860ac07820SSatish Balay if (flg) { 17870ac07820SSatish Balay ierr = MatPrintHelp(mat); CHKERRQ(ierr); 17880ac07820SSatish Balay } 17890ac07820SSatish Balay *newmat = mat; 17903a40ed3dSBarry Smith PetscFunctionReturn(0); 17910ac07820SSatish Balay } 179257b952d6SSatish Balay 179357b952d6SSatish Balay #include "sys.h" 179457b952d6SSatish Balay 17955615d1e5SSatish Balay #undef __FUNC__ 17965615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ" 179757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 179857b952d6SSatish Balay { 179957b952d6SSatish Balay Mat A; 180057b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 180157b952d6SSatish Balay Scalar *vals,*buf; 180257b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 180357b952d6SSatish Balay MPI_Status status; 1804cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 180557b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 180657b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 180757b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 180857b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 180957b952d6SSatish Balay 1810d64ed03dSBarry Smith PetscFunctionBegin; 181157b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 181257b952d6SSatish Balay bs2 = bs*bs; 181357b952d6SSatish Balay 181457b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 181557b952d6SSatish Balay if (!rank) { 181657b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1817e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1818a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1819d64ed03dSBarry Smith if (header[3] < 0) { 1820a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 1821d64ed03dSBarry Smith } 18226c5fab8fSBarry Smith } 1823d64ed03dSBarry Smith 1824ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 182557b952d6SSatish Balay M = header[1]; N = header[2]; 182657b952d6SSatish Balay 1827a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 182857b952d6SSatish Balay 182957b952d6SSatish Balay /* 183057b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 183157b952d6SSatish Balay divisible by the blocksize 183257b952d6SSatish Balay */ 183357b952d6SSatish Balay Mbs = M/bs; 183457b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 183557b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 183657b952d6SSatish Balay else Mbs++; 183757b952d6SSatish Balay if (extra_rows &&!rank) { 1838b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 183957b952d6SSatish Balay } 1840537820f0SBarry Smith 184157b952d6SSatish Balay /* determine ownership of all rows */ 184257b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 184357b952d6SSatish Balay m = mbs * bs; 1844cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1845cee3aa6bSSatish Balay browners = rowners + size + 1; 1846ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 184757b952d6SSatish Balay rowners[0] = 0; 1848cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1849cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 185057b952d6SSatish Balay rstart = rowners[rank]; 185157b952d6SSatish Balay rend = rowners[rank+1]; 185257b952d6SSatish Balay 185357b952d6SSatish Balay /* distribute row lengths to all processors */ 185457b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 185557b952d6SSatish Balay if (!rank) { 185657b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 1857e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 185857b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 185957b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1860cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1861ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 186257b952d6SSatish Balay PetscFree(sndcounts); 1863d64ed03dSBarry Smith } else { 1864ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 186557b952d6SSatish Balay } 186657b952d6SSatish Balay 186757b952d6SSatish Balay if (!rank) { 186857b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 186957b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 187057b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 187157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 187257b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 187357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 187457b952d6SSatish Balay } 187557b952d6SSatish Balay } 187657b952d6SSatish Balay PetscFree(rowlengths); 187757b952d6SSatish Balay 187857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 187957b952d6SSatish Balay maxnz = 0; 188057b952d6SSatish Balay for ( i=0; i<size; i++ ) { 188157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 188257b952d6SSatish Balay } 188357b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 188457b952d6SSatish Balay 188557b952d6SSatish Balay /* read in my part of the matrix column indices */ 188657b952d6SSatish Balay nz = procsnz[0]; 188757b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 188857b952d6SSatish Balay mycols = ibuf; 1889cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 1890e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1891cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1892cee3aa6bSSatish Balay 189357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 189457b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 189557b952d6SSatish Balay nz = procsnz[i]; 1896e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1897ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 189857b952d6SSatish Balay } 189957b952d6SSatish Balay /* read in the stuff for the last proc */ 190057b952d6SSatish Balay if ( size != 1 ) { 190157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 1902e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 190357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1904ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 190557b952d6SSatish Balay } 190657b952d6SSatish Balay PetscFree(cols); 1907d64ed03dSBarry Smith } else { 190857b952d6SSatish Balay /* determine buffer space needed for message */ 190957b952d6SSatish Balay nz = 0; 191057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 191157b952d6SSatish Balay nz += locrowlens[i]; 191257b952d6SSatish Balay } 191357b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 191457b952d6SSatish Balay mycols = ibuf; 191557b952d6SSatish Balay /* receive message of column indices*/ 1916ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1917ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1918a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 191957b952d6SSatish Balay } 192057b952d6SSatish Balay 192157b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1922cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1923cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 192457b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1925cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 192657b952d6SSatish Balay masked1 = mask + Mbs; 192757b952d6SSatish Balay masked2 = masked1 + Mbs; 192857b952d6SSatish Balay rowcount = 0; nzcount = 0; 192957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 193057b952d6SSatish Balay dcount = 0; 193157b952d6SSatish Balay odcount = 0; 193257b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 193357b952d6SSatish Balay kmax = locrowlens[rowcount]; 193457b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 193557b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 193657b952d6SSatish Balay if (!mask[tmp]) { 193757b952d6SSatish Balay mask[tmp] = 1; 193857b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 193957b952d6SSatish Balay else masked1[dcount++] = tmp; 194057b952d6SSatish Balay } 194157b952d6SSatish Balay } 194257b952d6SSatish Balay rowcount++; 194357b952d6SSatish Balay } 1944cee3aa6bSSatish Balay 194557b952d6SSatish Balay dlens[i] = dcount; 194657b952d6SSatish Balay odlens[i] = odcount; 1947cee3aa6bSSatish Balay 194857b952d6SSatish Balay /* zero out the mask elements we set */ 194957b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 195057b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 195157b952d6SSatish Balay } 1952cee3aa6bSSatish Balay 195357b952d6SSatish Balay /* create our matrix */ 1954537820f0SBarry Smith ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 1955537820f0SBarry Smith CHKERRQ(ierr); 195657b952d6SSatish Balay A = *newmat; 19576d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 195857b952d6SSatish Balay 195957b952d6SSatish Balay if (!rank) { 196057b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 196157b952d6SSatish Balay /* read in my part of the matrix numerical values */ 196257b952d6SSatish Balay nz = procsnz[0]; 196357b952d6SSatish Balay vals = buf; 1964cee3aa6bSSatish Balay mycols = ibuf; 1965cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 1966e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1967cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 1968537820f0SBarry Smith 196957b952d6SSatish Balay /* insert into matrix */ 197057b952d6SSatish Balay jj = rstart*bs; 197157b952d6SSatish Balay for ( i=0; i<m; i++ ) { 197257b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 197357b952d6SSatish Balay mycols += locrowlens[i]; 197457b952d6SSatish Balay vals += locrowlens[i]; 197557b952d6SSatish Balay jj++; 197657b952d6SSatish Balay } 197757b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 197857b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 197957b952d6SSatish Balay nz = procsnz[i]; 198057b952d6SSatish Balay vals = buf; 1981e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1982ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 198357b952d6SSatish Balay } 198457b952d6SSatish Balay /* the last proc */ 198557b952d6SSatish Balay if ( size != 1 ){ 198657b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1987cee3aa6bSSatish Balay vals = buf; 1988e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 198957b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1990ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 199157b952d6SSatish Balay } 199257b952d6SSatish Balay PetscFree(procsnz); 1993d64ed03dSBarry Smith } else { 199457b952d6SSatish Balay /* receive numeric values */ 199557b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 199657b952d6SSatish Balay 199757b952d6SSatish Balay /* receive message of values*/ 199857b952d6SSatish Balay vals = buf; 1999cee3aa6bSSatish Balay mycols = ibuf; 2000ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2001ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2002a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 200357b952d6SSatish Balay 200457b952d6SSatish Balay /* insert into matrix */ 200557b952d6SSatish Balay jj = rstart*bs; 2006cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 200757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 200857b952d6SSatish Balay mycols += locrowlens[i]; 200957b952d6SSatish Balay vals += locrowlens[i]; 201057b952d6SSatish Balay jj++; 201157b952d6SSatish Balay } 201257b952d6SSatish Balay } 201357b952d6SSatish Balay PetscFree(locrowlens); 201457b952d6SSatish Balay PetscFree(buf); 201557b952d6SSatish Balay PetscFree(ibuf); 201657b952d6SSatish Balay PetscFree(rowners); 201757b952d6SSatish Balay PetscFree(dlens); 2018cee3aa6bSSatish Balay PetscFree(mask); 20196d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 20206d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 20213a40ed3dSBarry Smith PetscFunctionReturn(0); 202257b952d6SSatish Balay } 202357b952d6SSatish Balay 202457b952d6SSatish Balay 2025