1*54750694SBarry Smith /*$Id: mpibaij.c,v 1.190 2000/04/10 04:25:49 bsmith Exp bsmith $*/ 279bdfe76SSatish Balay 377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 579bdfe76SSatish Balay 657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 1693fea6afSBarry Smith extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*); 1793fea6afSBarry Smith 1893fea6afSBarry Smith /* UGLY, ugly, ugly 1993fea6afSBarry Smith When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 2093fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2193fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2293fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2393fea6afSBarry Smith into the single precision data structures. 2493fea6afSBarry Smith */ 2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2693fea6afSBarry Smith extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27*54750694SBarry Smith extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28*54750694SBarry Smith extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 2993fea6afSBarry Smith #else 3093fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3193fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 3293fea6afSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3393fea6afSBarry Smith #endif 343b2fbd54SBarry Smith 357fc3c18eSBarry Smith EXTERN_C_BEGIN 367fc3c18eSBarry Smith #undef __FUNC__ 37b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ" 387fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 397fc3c18eSBarry Smith { 407fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 417fc3c18eSBarry Smith int ierr; 427fc3c18eSBarry Smith 437fc3c18eSBarry Smith PetscFunctionBegin; 447fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 457fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 467fc3c18eSBarry Smith PetscFunctionReturn(0); 477fc3c18eSBarry Smith } 487fc3c18eSBarry Smith EXTERN_C_END 497fc3c18eSBarry Smith 507fc3c18eSBarry Smith EXTERN_C_BEGIN 517fc3c18eSBarry Smith #undef __FUNC__ 52b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ" 537fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 547fc3c18eSBarry Smith { 557fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 567fc3c18eSBarry Smith int ierr; 577fc3c18eSBarry Smith 587fc3c18eSBarry Smith PetscFunctionBegin; 597fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 607fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 617fc3c18eSBarry Smith PetscFunctionReturn(0); 627fc3c18eSBarry Smith } 637fc3c18eSBarry Smith EXTERN_C_END 647fc3c18eSBarry Smith 65537820f0SBarry Smith /* 66537820f0SBarry Smith Local utility routine that creates a mapping from the global column 6757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 6857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 6957b952d6SSatish Balay length of colmap equals the global matrix length. 7057b952d6SSatish Balay */ 715615d1e5SSatish Balay #undef __FUNC__ 72b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private" 7357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 7457b952d6SSatish Balay { 7557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 7657b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 77dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 7857b952d6SSatish Balay 79d64ed03dSBarry Smith PetscFunctionBegin; 80aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 810f5bd95cSBarry Smith ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 8248e59246SSatish Balay for (i=0; i<nbs; i++){ 830f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 8448e59246SSatish Balay } 8548e59246SSatish Balay #else 86758f045eSSatish Balay baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 8757b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 88549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 89928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 9048e59246SSatish Balay #endif 913a40ed3dSBarry Smith PetscFunctionReturn(0); 9257b952d6SSatish Balay } 9357b952d6SSatish Balay 9480c1aa95SSatish Balay #define CHUNKSIZE 10 9580c1aa95SSatish Balay 96f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 9780c1aa95SSatish Balay { \ 9880c1aa95SSatish Balay \ 9980c1aa95SSatish Balay brow = row/bs; \ 10080c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 10280c1aa95SSatish Balay bcol = col/bs; \ 10380c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 104ab26458aSBarry Smith low = 0; high = nrow; \ 105ab26458aSBarry Smith while (high-low > 3) { \ 106ab26458aSBarry Smith t = (low+high)/2; \ 107ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 108ab26458aSBarry Smith else low = t; \ 109ab26458aSBarry Smith } \ 110ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11180c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11280c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11380c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 114eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 115eada6651SSatish Balay else *bap = value; \ 116ac7a638eSSatish Balay goto a_noinsert; \ 11780c1aa95SSatish Balay } \ 11880c1aa95SSatish Balay } \ 11989280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 120a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 12180c1aa95SSatish Balay if (nrow >= rmax) { \ 12280c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 12380c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1243eda8832SBarry Smith MatScalar *new_a; \ 12580c1aa95SSatish Balay \ 126a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 12789280ab3SLois Curfman McInnes \ 12880c1aa95SSatish Balay /* malloc new storage space */ \ 1293eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 1303eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 13180c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 13280c1aa95SSatish Balay new_i = new_j + new_nz; \ 13380c1aa95SSatish Balay \ 13480c1aa95SSatish Balay /* copy over old data into new slots */ \ 13580c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 13680c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 137549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 13880c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1393eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1403eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 142549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1433eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 14480c1aa95SSatish Balay /* free up old matrix storage */ \ 145606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 146606d414cSSatish Balay if (!a->singlemalloc) { \ 147606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 148606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 149606d414cSSatish Balay } \ 15080c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1517c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 15280c1aa95SSatish Balay \ 15380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 154ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 1553eda8832SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 15680c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 15780c1aa95SSatish Balay a->reallocs++; \ 15880c1aa95SSatish Balay a->nz++; \ 15980c1aa95SSatish Balay } \ 16080c1aa95SSatish Balay N = nrow++ - 1; \ 16180c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 16280c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 16380c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1643eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16580c1aa95SSatish Balay } \ 1663eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 16780c1aa95SSatish Balay rp[_i] = bcol; \ 16880c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 169ac7a638eSSatish Balay a_noinsert:; \ 17080c1aa95SSatish Balay ailen[brow] = nrow; \ 17180c1aa95SSatish Balay } 17257b952d6SSatish Balay 173ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 174ac7a638eSSatish Balay { \ 175ac7a638eSSatish Balay brow = row/bs; \ 176ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 177ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 178ac7a638eSSatish Balay bcol = col/bs; \ 179ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 180ac7a638eSSatish Balay low = 0; high = nrow; \ 181ac7a638eSSatish Balay while (high-low > 3) { \ 182ac7a638eSSatish Balay t = (low+high)/2; \ 183ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 184ac7a638eSSatish Balay else low = t; \ 185ac7a638eSSatish Balay } \ 186ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 187ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 188ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 189ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 190ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 191ac7a638eSSatish Balay else *bap = value; \ 192ac7a638eSSatish Balay goto b_noinsert; \ 193ac7a638eSSatish Balay } \ 194ac7a638eSSatish Balay } \ 19589280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 196a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 197ac7a638eSSatish Balay if (nrow >= rmax) { \ 198ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 19974c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2003eda8832SBarry Smith MatScalar *new_a; \ 201ac7a638eSSatish Balay \ 202a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 20389280ab3SLois Curfman McInnes \ 204ac7a638eSSatish Balay /* malloc new storage space */ \ 2053eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 2063eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 207ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 208ac7a638eSSatish Balay new_i = new_j + new_nz; \ 209ac7a638eSSatish Balay \ 210ac7a638eSSatish Balay /* copy over old data into new slots */ \ 211ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 21274c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 213549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 214ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2153eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2163eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2173eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 218549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2193eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 220ac7a638eSSatish Balay /* free up old matrix storage */ \ 221606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 222606d414cSSatish Balay if (!b->singlemalloc) { \ 223606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 224606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 225606d414cSSatish Balay } \ 22674c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2277c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 228ac7a638eSSatish Balay \ 229ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 230ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 2313eda8832SBarry Smith PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 23274c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 23374c639caSSatish Balay b->reallocs++; \ 23474c639caSSatish Balay b->nz++; \ 235ac7a638eSSatish Balay } \ 236ac7a638eSSatish Balay N = nrow++ - 1; \ 237ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 238ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 239ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2403eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 241ac7a638eSSatish Balay } \ 2423eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 243ac7a638eSSatish Balay rp[_i] = bcol; \ 244ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 245ac7a638eSSatish Balay b_noinsert:; \ 246ac7a638eSSatish Balay bilen[brow] = nrow; \ 247ac7a638eSSatish Balay } 248ac7a638eSSatish Balay 24993fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2505615d1e5SSatish Balay #undef __FUNC__ 251b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 252ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 25357b952d6SSatish Balay { 25493fea6afSBarry Smith int ierr,i,N = m*n; 25593fea6afSBarry Smith MatScalar *vsingle = (MatScalar*)v; 25693fea6afSBarry Smith 25793fea6afSBarry Smith PetscFunctionBegin; 25893fea6afSBarry Smith for (i=0; i<N; i++) { 25993fea6afSBarry Smith vsingle[i] = v[i]; 26093fea6afSBarry Smith } 26193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 26293fea6afSBarry Smith PetscFunctionReturn(0); 26393fea6afSBarry Smith } 26493fea6afSBarry Smith 26593fea6afSBarry Smith #undef __FUNC__ 26693fea6afSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 26793fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 26893fea6afSBarry Smith { 26993fea6afSBarry Smith int ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2; 27093fea6afSBarry Smith MatScalar *vsingle = (MatScalar*)v; 27193fea6afSBarry Smith 27293fea6afSBarry Smith PetscFunctionBegin; 27393fea6afSBarry Smith for (i=0; i<N; i++) { 27493fea6afSBarry Smith vsingle[i] = v[i]; 27593fea6afSBarry Smith } 27693fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 27793fea6afSBarry Smith PetscFunctionReturn(0); 27893fea6afSBarry Smith } 27993fea6afSBarry Smith #endif 28093fea6afSBarry Smith 28193fea6afSBarry Smith #undef __FUNC__ 28293fea6afSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 28393fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 28493fea6afSBarry Smith { 28557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 28693fea6afSBarry Smith MatScalar value; 2874fa0d573SSatish Balay int ierr,i,j,row,col; 2884fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 2894fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 2904fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 29157b952d6SSatish Balay 292eada6651SSatish Balay /* Some Variables required in the macro */ 29380c1aa95SSatish Balay Mat A = baij->A; 29480c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 295ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 2963eda8832SBarry Smith MatScalar *aa=a->a; 297ac7a638eSSatish Balay 298ac7a638eSSatish Balay Mat B = baij->B; 299ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 300ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3013eda8832SBarry Smith MatScalar *ba=b->a; 302ac7a638eSSatish Balay 303ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 304ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3053eda8832SBarry Smith MatScalar *ap,*bap; 30680c1aa95SSatish Balay 307d64ed03dSBarry Smith PetscFunctionBegin; 30857b952d6SSatish Balay for (i=0; i<m; i++) { 3095ef9f2a5SBarry Smith if (im[i] < 0) continue; 310aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 311a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 312639f9d9dSBarry Smith #endif 31357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 31457b952d6SSatish Balay row = im[i] - rstart_orig; 31557b952d6SSatish Balay for (j=0; j<n; j++) { 31657b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 31757b952d6SSatish Balay col = in[j] - cstart_orig; 31857b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 319f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 32080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 32173959e64SBarry Smith } else if (in[j] < 0) continue; 322aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 323a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 324639f9d9dSBarry Smith #endif 32557b952d6SSatish Balay else { 32657b952d6SSatish Balay if (mat->was_assembled) { 327905e6a2fSBarry Smith if (!baij->colmap) { 328905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 329905e6a2fSBarry Smith } 330aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3310f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 332fa46199cSSatish Balay col = col - 1 + in[j]%bs; 33348e59246SSatish Balay #else 334905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 33548e59246SSatish Balay #endif 33657b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 33757b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 33857b952d6SSatish Balay col = in[j]; 3399bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 3409bf004c3SSatish Balay B = baij->B; 3419bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 3429bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 3439bf004c3SSatish Balay ba=b->a; 34457b952d6SSatish Balay } 345d64ed03dSBarry Smith } else col = in[j]; 34657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 347ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 348ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 34957b952d6SSatish Balay } 35057b952d6SSatish Balay } 351d64ed03dSBarry Smith } else { 35290f02eecSBarry Smith if (!baij->donotstash) { 353ff2fd236SBarry Smith if (roworiented) { 3548798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 355ff2fd236SBarry Smith } else { 3568798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 35757b952d6SSatish Balay } 35857b952d6SSatish Balay } 35957b952d6SSatish Balay } 36090f02eecSBarry Smith } 3613a40ed3dSBarry Smith PetscFunctionReturn(0); 36257b952d6SSatish Balay } 36357b952d6SSatish Balay 364ab26458aSBarry Smith #undef __FUNC__ 365b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 36693fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 367ab26458aSBarry Smith { 368ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 36993fea6afSBarry Smith MatScalar *value,*barray=baij->barray; 3707ef1d9bdSSatish Balay int ierr,i,j,ii,jj,row,col; 371ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 372ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 373ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 374ab26458aSBarry Smith 375b16ae2b1SBarry Smith PetscFunctionBegin; 37630793edcSSatish Balay if(!barray) { 37793fea6afSBarry Smith baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 37830793edcSSatish Balay } 37930793edcSSatish Balay 380ab26458aSBarry Smith if (roworiented) { 381ab26458aSBarry Smith stepval = (n-1)*bs; 382ab26458aSBarry Smith } else { 383ab26458aSBarry Smith stepval = (m-1)*bs; 384ab26458aSBarry Smith } 385ab26458aSBarry Smith for (i=0; i<m; i++) { 3865ef9f2a5SBarry Smith if (im[i] < 0) continue; 387aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 3885ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 389ab26458aSBarry Smith #endif 390ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 391ab26458aSBarry Smith row = im[i] - rstart; 392ab26458aSBarry Smith for (j=0; j<n; j++) { 39315b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 39415b57d14SSatish Balay if ((roworiented) && (n == 1)) { 39515b57d14SSatish Balay barray = v + i*bs2; 39615b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 39715b57d14SSatish Balay barray = v + j*bs2; 39815b57d14SSatish Balay } else { /* Here a copy is required */ 399ab26458aSBarry Smith if (roworiented) { 400ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 401ab26458aSBarry Smith } else { 402ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 403abef11f7SSatish Balay } 40447513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 40547513183SBarry Smith for (jj=0; jj<bs; jj++) { 40630793edcSSatish Balay *barray++ = *value++; 40747513183SBarry Smith } 40847513183SBarry Smith } 40930793edcSSatish Balay barray -=bs2; 41015b57d14SSatish Balay } 411abef11f7SSatish Balay 412abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 413abef11f7SSatish Balay col = in[j] - cstart; 41493fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 415ab26458aSBarry Smith } 4165ef9f2a5SBarry Smith else if (in[j] < 0) continue; 417aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4185ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 419ab26458aSBarry Smith #endif 420ab26458aSBarry Smith else { 421ab26458aSBarry Smith if (mat->was_assembled) { 422ab26458aSBarry Smith if (!baij->colmap) { 423ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 424ab26458aSBarry Smith } 425a5eb4965SSatish Balay 426aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 427aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 428fa46199cSSatish Balay { int data; 4290f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4300f5bd95cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 431fa46199cSSatish Balay } 43248e59246SSatish Balay #else 4330f5bd95cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 434a5eb4965SSatish Balay #endif 43548e59246SSatish Balay #endif 436aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4370f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 438fa46199cSSatish Balay col = (col - 1)/bs; 43948e59246SSatish Balay #else 440a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 44148e59246SSatish Balay #endif 442ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 443ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 444ab26458aSBarry Smith col = in[j]; 445ab26458aSBarry Smith } 446ab26458aSBarry Smith } 447ab26458aSBarry Smith else col = in[j]; 44893fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 449ab26458aSBarry Smith } 450ab26458aSBarry Smith } 451d64ed03dSBarry Smith } else { 452ab26458aSBarry Smith if (!baij->donotstash) { 453ff2fd236SBarry Smith if (roworiented) { 4548798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 455ff2fd236SBarry Smith } else { 4568798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 457ff2fd236SBarry Smith } 458abef11f7SSatish Balay } 459ab26458aSBarry Smith } 460ab26458aSBarry Smith } 4613a40ed3dSBarry Smith PetscFunctionReturn(0); 462ab26458aSBarry Smith } 4630bdbc534SSatish Balay #define HASH_KEY 0.6180339887 464c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 465c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 466c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 4675615d1e5SSatish Balay #undef __FUNC__ 468b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT" 4690bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4700bdbc534SSatish Balay { 4710bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 4720bdbc534SSatish Balay int ierr,i,j,row,col; 4730bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 474c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 475c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 476329f5518SBarry Smith PetscReal tmp; 4773eda8832SBarry Smith MatScalar ** HD = baij->hd,value; 478aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4794a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4804a15367fSSatish Balay #endif 4810bdbc534SSatish Balay 4820bdbc534SSatish Balay PetscFunctionBegin; 4830bdbc534SSatish Balay 4840bdbc534SSatish Balay for (i=0; i<m; i++) { 485aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4860bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 4870bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 4880bdbc534SSatish Balay #endif 4890bdbc534SSatish Balay row = im[i]; 490c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4910bdbc534SSatish Balay for (j=0; j<n; j++) { 4920bdbc534SSatish Balay col = in[j]; 4933eda8832SBarry Smith if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m]; 4940bdbc534SSatish Balay /* Look up into the Hash Table */ 495c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 496c2760754SSatish Balay h1 = HASH(size,key,tmp); 4970bdbc534SSatish Balay 498c2760754SSatish Balay 499c2760754SSatish Balay idx = h1; 500aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 501187ce0cbSSatish Balay insert_ct++; 502187ce0cbSSatish Balay total_ct++; 503187ce0cbSSatish Balay if (HT[idx] != key) { 504187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505187ce0cbSSatish Balay if (idx == size) { 506187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507187ce0cbSSatish Balay if (idx == h1) { 508187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 509187ce0cbSSatish Balay } 510187ce0cbSSatish Balay } 511187ce0cbSSatish Balay } 512187ce0cbSSatish Balay #else 513c2760754SSatish Balay if (HT[idx] != key) { 514c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 515c2760754SSatish Balay if (idx == size) { 516c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 517c2760754SSatish Balay if (idx == h1) { 518c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 519c2760754SSatish Balay } 520c2760754SSatish Balay } 521c2760754SSatish Balay } 522187ce0cbSSatish Balay #endif 523c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 524c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 525c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5260bdbc534SSatish Balay } 5270bdbc534SSatish Balay } else { 5280bdbc534SSatish Balay if (!baij->donotstash) { 529ff2fd236SBarry Smith if (roworiented) { 5308798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 531ff2fd236SBarry Smith } else { 5328798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5330bdbc534SSatish Balay } 5340bdbc534SSatish Balay } 5350bdbc534SSatish Balay } 5360bdbc534SSatish Balay } 537aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 538187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 539187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 540187ce0cbSSatish Balay #endif 5410bdbc534SSatish Balay PetscFunctionReturn(0); 5420bdbc534SSatish Balay } 5430bdbc534SSatish Balay 5440bdbc534SSatish Balay #undef __FUNC__ 545b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 5460bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 5470bdbc534SSatish Balay { 5480bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 5498798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 5500bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 551b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 552c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 553329f5518SBarry Smith PetscReal tmp; 5543eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 5553eda8832SBarry Smith Scalar *v_t,*value; 556aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5574a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5584a15367fSSatish Balay #endif 5590bdbc534SSatish Balay 560d0a41580SSatish Balay PetscFunctionBegin; 561d0a41580SSatish Balay 5620bdbc534SSatish Balay if (roworiented) { 5630bdbc534SSatish Balay stepval = (n-1)*bs; 5640bdbc534SSatish Balay } else { 5650bdbc534SSatish Balay stepval = (m-1)*bs; 5660bdbc534SSatish Balay } 5670bdbc534SSatish Balay for (i=0; i<m; i++) { 568aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5690bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 5700bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 5710bdbc534SSatish Balay #endif 5720bdbc534SSatish Balay row = im[i]; 573187ce0cbSSatish Balay v_t = v + i*bs2; 574c2760754SSatish Balay if (row >= rstart && row < rend) { 5750bdbc534SSatish Balay for (j=0; j<n; j++) { 5760bdbc534SSatish Balay col = in[j]; 5770bdbc534SSatish Balay 5780bdbc534SSatish Balay /* Look up into the Hash Table */ 579c2760754SSatish Balay key = row*Nbs+col+1; 580c2760754SSatish Balay h1 = HASH(size,key,tmp); 5810bdbc534SSatish Balay 582c2760754SSatish Balay idx = h1; 583aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 584187ce0cbSSatish Balay total_ct++; 585187ce0cbSSatish Balay insert_ct++; 586187ce0cbSSatish Balay if (HT[idx] != key) { 587187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 588187ce0cbSSatish Balay if (idx == size) { 589187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 590187ce0cbSSatish Balay if (idx == h1) { 591187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 592187ce0cbSSatish Balay } 593187ce0cbSSatish Balay } 594187ce0cbSSatish Balay } 595187ce0cbSSatish Balay #else 596c2760754SSatish Balay if (HT[idx] != key) { 597c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 598c2760754SSatish Balay if (idx == size) { 599c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 600c2760754SSatish Balay if (idx == h1) { 601c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 602c2760754SSatish Balay } 603c2760754SSatish Balay } 604c2760754SSatish Balay } 605187ce0cbSSatish Balay #endif 606c2760754SSatish Balay baij_a = HD[idx]; 6070bdbc534SSatish Balay if (roworiented) { 608c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 609187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 610187ce0cbSSatish Balay value = v_t; 611187ce0cbSSatish Balay v_t += bs; 612fef45726SSatish Balay if (addv == ADD_VALUES) { 613c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 614c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 615fef45726SSatish Balay baij_a[jj] += *value++; 616b4cc0f5aSSatish Balay } 617b4cc0f5aSSatish Balay } 618fef45726SSatish Balay } else { 619c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 620c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 621fef45726SSatish Balay baij_a[jj] = *value++; 622fef45726SSatish Balay } 623fef45726SSatish Balay } 624fef45726SSatish Balay } 6250bdbc534SSatish Balay } else { 6260bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 627fef45726SSatish Balay if (addv == ADD_VALUES) { 628b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6290bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 630fef45726SSatish Balay baij_a[jj] += *value++; 631fef45726SSatish Balay } 632fef45726SSatish Balay } 633fef45726SSatish Balay } else { 634fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 635fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 636fef45726SSatish Balay baij_a[jj] = *value++; 637fef45726SSatish Balay } 638b4cc0f5aSSatish Balay } 6390bdbc534SSatish Balay } 6400bdbc534SSatish Balay } 6410bdbc534SSatish Balay } 6420bdbc534SSatish Balay } else { 6430bdbc534SSatish Balay if (!baij->donotstash) { 6440bdbc534SSatish Balay if (roworiented) { 6458798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6460bdbc534SSatish Balay } else { 6478798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6480bdbc534SSatish Balay } 6490bdbc534SSatish Balay } 6500bdbc534SSatish Balay } 6510bdbc534SSatish Balay } 652aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 653187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 654187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 655187ce0cbSSatish Balay #endif 6560bdbc534SSatish Balay PetscFunctionReturn(0); 6570bdbc534SSatish Balay } 658133cdb44SSatish Balay 6590bdbc534SSatish Balay #undef __FUNC__ 660b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 661ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 662d6de1c52SSatish Balay { 663d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 664d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 66548e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 666d6de1c52SSatish Balay 667133cdb44SSatish Balay PetscFunctionBegin; 668d6de1c52SSatish Balay for (i=0; i<m; i++) { 669a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 670a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 671d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 672d6de1c52SSatish Balay row = idxm[i] - bsrstart; 673d6de1c52SSatish Balay for (j=0; j<n; j++) { 674a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 675a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 676d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 677d6de1c52SSatish Balay col = idxn[j] - bscstart; 67898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 679d64ed03dSBarry Smith } else { 680905e6a2fSBarry Smith if (!baij->colmap) { 681905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 682905e6a2fSBarry Smith } 683aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 6840f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 685fa46199cSSatish Balay data --; 68648e59246SSatish Balay #else 68748e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 68848e59246SSatish Balay #endif 68948e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 690d9d09a02SSatish Balay else { 69148e59246SSatish Balay col = data + idxn[j]%bs; 69298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 693d6de1c52SSatish Balay } 694d6de1c52SSatish Balay } 695d6de1c52SSatish Balay } 696d64ed03dSBarry Smith } else { 697a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 698d6de1c52SSatish Balay } 699d6de1c52SSatish Balay } 7003a40ed3dSBarry Smith PetscFunctionReturn(0); 701d6de1c52SSatish Balay } 702d6de1c52SSatish Balay 7035615d1e5SSatish Balay #undef __FUNC__ 704b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 705329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 706d6de1c52SSatish Balay { 707d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 708d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 709acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 710329f5518SBarry Smith PetscReal sum = 0.0; 7113eda8832SBarry Smith MatScalar *v; 712d6de1c52SSatish Balay 713d64ed03dSBarry Smith PetscFunctionBegin; 714d6de1c52SSatish Balay if (baij->size == 1) { 715d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 716d6de1c52SSatish Balay } else { 717d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 718d6de1c52SSatish Balay v = amat->a; 719d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 720aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 721329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 722d6de1c52SSatish Balay #else 723d6de1c52SSatish Balay sum += (*v)*(*v); v++; 724d6de1c52SSatish Balay #endif 725d6de1c52SSatish Balay } 726d6de1c52SSatish Balay v = bmat->a; 727d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 728aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 729329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 730d6de1c52SSatish Balay #else 731d6de1c52SSatish Balay sum += (*v)*(*v); v++; 732d6de1c52SSatish Balay #endif 733d6de1c52SSatish Balay } 734ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 735d6de1c52SSatish Balay *norm = sqrt(*norm); 736d64ed03dSBarry Smith } else { 737e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 738d6de1c52SSatish Balay } 739d64ed03dSBarry Smith } 7403a40ed3dSBarry Smith PetscFunctionReturn(0); 741d6de1c52SSatish Balay } 74257b952d6SSatish Balay 743bd7f49f5SSatish Balay 744fef45726SSatish Balay /* 745fef45726SSatish Balay Creates the hash table, and sets the table 746fef45726SSatish Balay This table is created only once. 747fef45726SSatish Balay If new entried need to be added to the matrix 748fef45726SSatish Balay then the hash table has to be destroyed and 749fef45726SSatish Balay recreated. 750fef45726SSatish Balay */ 751fef45726SSatish Balay #undef __FUNC__ 752b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 753329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 754596b8d2eSBarry Smith { 755596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 756596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 757596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 7580bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 759549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 760187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 761fef45726SSatish Balay int *HT,key; 7623eda8832SBarry Smith MatScalar **HD; 763329f5518SBarry Smith PetscReal tmp; 764aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 7654a15367fSSatish Balay int ct=0,max=0; 7664a15367fSSatish Balay #endif 767fef45726SSatish Balay 768d64ed03dSBarry Smith PetscFunctionBegin; 7690bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 7700bdbc534SSatish Balay size = baij->ht_size; 771fef45726SSatish Balay 7720bdbc534SSatish Balay if (baij->ht) { 7730bdbc534SSatish Balay PetscFunctionReturn(0); 774596b8d2eSBarry Smith } 7750bdbc534SSatish Balay 776fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7773eda8832SBarry Smith baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 778b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 779b9e4cc15SSatish Balay HD = baij->hd; 780a07cd24cSSatish Balay HT = baij->ht; 781b9e4cc15SSatish Balay 782b9e4cc15SSatish Balay 783549d3d68SSatish Balay ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 7840bdbc534SSatish Balay 785596b8d2eSBarry Smith 786596b8d2eSBarry Smith /* Loop Over A */ 7870bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 788596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 7890bdbc534SSatish Balay row = i+rstart; 7900bdbc534SSatish Balay col = aj[j]+cstart; 791596b8d2eSBarry Smith 792187ce0cbSSatish Balay key = row*Nbs + col + 1; 793c2760754SSatish Balay h1 = HASH(size,key,tmp); 7940bdbc534SSatish Balay for (k=0; k<size; k++){ 7950bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 7960bdbc534SSatish Balay HT[(h1+k)%size] = key; 7970bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 798596b8d2eSBarry Smith break; 799aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 800187ce0cbSSatish Balay } else { 801187ce0cbSSatish Balay ct++; 802187ce0cbSSatish Balay #endif 803596b8d2eSBarry Smith } 804187ce0cbSSatish Balay } 805aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 806187ce0cbSSatish Balay if (k> max) max = k; 807187ce0cbSSatish Balay #endif 808596b8d2eSBarry Smith } 809596b8d2eSBarry Smith } 810596b8d2eSBarry Smith /* Loop Over B */ 8110bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 812596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 8130bdbc534SSatish Balay row = i+rstart; 8140bdbc534SSatish Balay col = garray[bj[j]]; 815187ce0cbSSatish Balay key = row*Nbs + col + 1; 816c2760754SSatish Balay h1 = HASH(size,key,tmp); 8170bdbc534SSatish Balay for (k=0; k<size; k++){ 8180bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8190bdbc534SSatish Balay HT[(h1+k)%size] = key; 8200bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 821596b8d2eSBarry Smith break; 822aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 823187ce0cbSSatish Balay } else { 824187ce0cbSSatish Balay ct++; 825187ce0cbSSatish Balay #endif 826596b8d2eSBarry Smith } 827187ce0cbSSatish Balay } 828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 829187ce0cbSSatish Balay if (k> max) max = k; 830187ce0cbSSatish Balay #endif 831596b8d2eSBarry Smith } 832596b8d2eSBarry Smith } 833596b8d2eSBarry Smith 834596b8d2eSBarry Smith /* Print Summary */ 835aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 836c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 837596b8d2eSBarry Smith if (HT[i]) {j++;} 838c38d4ed2SBarry Smith } 839329f5518SBarry Smith PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 840187ce0cbSSatish Balay #endif 8413a40ed3dSBarry Smith PetscFunctionReturn(0); 842596b8d2eSBarry Smith } 84357b952d6SSatish Balay 844bbb85fb3SSatish Balay #undef __FUNC__ 845b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 846bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 847bbb85fb3SSatish Balay { 848bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849ff2fd236SBarry Smith int ierr,nstash,reallocs; 850bbb85fb3SSatish Balay InsertMode addv; 851bbb85fb3SSatish Balay 852bbb85fb3SSatish Balay PetscFunctionBegin; 853bbb85fb3SSatish Balay if (baij->donotstash) { 854bbb85fb3SSatish Balay PetscFunctionReturn(0); 855bbb85fb3SSatish Balay } 856bbb85fb3SSatish Balay 857bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 858bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 859bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 860bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 861bbb85fb3SSatish Balay } 862bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 863bbb85fb3SSatish Balay 8648798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 8658798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 8668798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8675a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 8688798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8695a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 870bbb85fb3SSatish Balay PetscFunctionReturn(0); 871bbb85fb3SSatish Balay } 872bbb85fb3SSatish Balay 873bbb85fb3SSatish Balay #undef __FUNC__ 874b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 875bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876bbb85fb3SSatish Balay { 877bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 878a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 879a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 8807c922b88SBarry Smith int *row,*col,other_disassembled; 8817c922b88SBarry Smith PetscTruth r1,r2,r3; 8823eda8832SBarry Smith MatScalar *val; 883bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 884bbb85fb3SSatish Balay 885bbb85fb3SSatish Balay PetscFunctionBegin; 886bbb85fb3SSatish Balay if (!baij->donotstash) { 887a2d1c673SSatish Balay while (1) { 8888798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 889a2d1c673SSatish Balay if (!flg) break; 890a2d1c673SSatish Balay 891bbb85fb3SSatish Balay for (i=0; i<n;) { 892bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 893bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 894bbb85fb3SSatish Balay if (j < n) ncols = j-i; 895bbb85fb3SSatish Balay else ncols = n-i; 896bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 89793fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 898bbb85fb3SSatish Balay i = j; 899bbb85fb3SSatish Balay } 900bbb85fb3SSatish Balay } 9018798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 902a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 903a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 904a2d1c673SSatish Balay restore the original flags */ 905a2d1c673SSatish Balay r1 = baij->roworiented; 906a2d1c673SSatish Balay r2 = a->roworiented; 907a2d1c673SSatish Balay r3 = b->roworiented; 9087c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 9097c922b88SBarry Smith a->roworiented = PETSC_FALSE; 9107c922b88SBarry Smith b->roworiented = PETSC_FALSE; 911a2d1c673SSatish Balay while (1) { 9128798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 913a2d1c673SSatish Balay if (!flg) break; 914a2d1c673SSatish Balay 915a2d1c673SSatish Balay for (i=0; i<n;) { 916a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 917a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 918a2d1c673SSatish Balay if (j < n) ncols = j-i; 919a2d1c673SSatish Balay else ncols = n-i; 92093fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 921a2d1c673SSatish Balay i = j; 922a2d1c673SSatish Balay } 923a2d1c673SSatish Balay } 9248798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 925a2d1c673SSatish Balay baij->roworiented = r1; 926a2d1c673SSatish Balay a->roworiented = r2; 927a2d1c673SSatish Balay b->roworiented = r3; 928bbb85fb3SSatish Balay } 929bbb85fb3SSatish Balay 930bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 931bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 932bbb85fb3SSatish Balay 933bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 934bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 935bbb85fb3SSatish Balay /* 936bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 937bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 938bbb85fb3SSatish Balay */ 939bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 940bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 941bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 942bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 943bbb85fb3SSatish Balay } 944bbb85fb3SSatish Balay } 945bbb85fb3SSatish Balay 946bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 947bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 948bbb85fb3SSatish Balay } 949bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 950bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 951bbb85fb3SSatish Balay 952aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 953bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 954c38d4ed2SBarry Smith PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 955bbb85fb3SSatish Balay baij->ht_total_ct = 0; 956bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 957bbb85fb3SSatish Balay } 958bbb85fb3SSatish Balay #endif 959bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 960bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 961bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 962bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 963bbb85fb3SSatish Balay } 964bbb85fb3SSatish Balay 965606d414cSSatish Balay if (baij->rowvalues) { 966606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 967606d414cSSatish Balay baij->rowvalues = 0; 968606d414cSSatish Balay } 969bbb85fb3SSatish Balay PetscFunctionReturn(0); 970bbb85fb3SSatish Balay } 97157b952d6SSatish Balay 9725615d1e5SSatish Balay #undef __FUNC__ 973b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 9747b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 97557b952d6SSatish Balay { 97657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 97777ed5343SBarry Smith int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 9786831982aSBarry Smith PetscTruth isascii,isdraw; 97957b952d6SSatish Balay 980d64ed03dSBarry Smith PetscFunctionBegin; 9816831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 9826831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 9830f5bd95cSBarry Smith if (isascii) { 984d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 985639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 9864e220ebcSLois Curfman McInnes MatInfo info; 987d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 988d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 9896831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 9904e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 9916831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 992d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 9936831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 994d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 9956831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 9966831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 99757b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 9983a40ed3dSBarry Smith PetscFunctionReturn(0); 999d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 10006831982aSBarry Smith ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 10013a40ed3dSBarry Smith PetscFunctionReturn(0); 100257b952d6SSatish Balay } 100357b952d6SSatish Balay } 100457b952d6SSatish Balay 10050f5bd95cSBarry Smith if (isdraw) { 100657b952d6SSatish Balay Draw draw; 100757b952d6SSatish Balay PetscTruth isnull; 100877ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 10093a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 101057b952d6SSatish Balay } 101157b952d6SSatish Balay 101257b952d6SSatish Balay if (size == 1) { 101357b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1014d64ed03dSBarry Smith } else { 101557b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 101657b952d6SSatish Balay Mat A; 101757b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 10183eda8832SBarry Smith int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 10193eda8832SBarry Smith MatScalar *a; 102057b952d6SSatish Balay 102157b952d6SSatish Balay if (!rank) { 102255843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1023d64ed03dSBarry Smith } else { 102455843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 102557b952d6SSatish Balay } 102657b952d6SSatish Balay PLogObjectParent(mat,A); 102757b952d6SSatish Balay 102857b952d6SSatish Balay /* copy over the A part */ 102957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 103057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 103157b952d6SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 103257b952d6SSatish Balay 103357b952d6SSatish Balay for (i=0; i<mbs; i++) { 103457b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 103557b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 103657b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 103757b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 103857b952d6SSatish Balay for (k=0; k<bs; k++) { 103993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040cee3aa6bSSatish Balay col++; a += bs; 104157b952d6SSatish Balay } 104257b952d6SSatish Balay } 104357b952d6SSatish Balay } 104457b952d6SSatish Balay /* copy over the B part */ 104557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 104657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 104757b952d6SSatish Balay for (i=0; i<mbs; i++) { 104857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 104957b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 105057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 105157b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 105257b952d6SSatish Balay for (k=0; k<bs; k++) { 105393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054cee3aa6bSSatish Balay col++; a += bs; 105557b952d6SSatish Balay } 105657b952d6SSatish Balay } 105757b952d6SSatish Balay } 1058606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 10596d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10606d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106155843e3eSBarry Smith /* 106255843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 106355843e3eSBarry Smith synchronized across all processors that share the Draw object 106455843e3eSBarry Smith */ 10656831982aSBarry Smith if (!rank) { 10666831982aSBarry Smith Viewer sviewer; 10676831982aSBarry Smith ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 10686831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 10696831982aSBarry Smith ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 107057b952d6SSatish Balay } 107157b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 107257b952d6SSatish Balay } 10733a40ed3dSBarry Smith PetscFunctionReturn(0); 107457b952d6SSatish Balay } 107557b952d6SSatish Balay 10765615d1e5SSatish Balay #undef __FUNC__ 1077b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1078e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 107957b952d6SSatish Balay { 108057b952d6SSatish Balay int ierr; 10816831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 108257b952d6SSatish Balay 1083d64ed03dSBarry Smith PetscFunctionBegin; 10846831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 10856831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 10866831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 10876831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 10880f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 10897b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 10905cd90555SBarry Smith } else { 10910f5bd95cSBarry Smith SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 109257b952d6SSatish Balay } 10933a40ed3dSBarry Smith PetscFunctionReturn(0); 109457b952d6SSatish Balay } 109557b952d6SSatish Balay 10965615d1e5SSatish Balay #undef __FUNC__ 1097b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1098e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 109979bdfe76SSatish Balay { 110079bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 110179bdfe76SSatish Balay int ierr; 110279bdfe76SSatish Balay 1103d64ed03dSBarry Smith PetscFunctionBegin; 110498dd23e9SBarry Smith 110598dd23e9SBarry Smith if (mat->mapping) { 110698dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 110798dd23e9SBarry Smith } 110898dd23e9SBarry Smith if (mat->bmapping) { 110998dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 111098dd23e9SBarry Smith } 111161b13de0SBarry Smith if (mat->rmap) { 111261b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 111361b13de0SBarry Smith } 111461b13de0SBarry Smith if (mat->cmap) { 111561b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 111661b13de0SBarry Smith } 1117aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1118e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 111979bdfe76SSatish Balay #endif 112079bdfe76SSatish Balay 11218798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 11228798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 11238798bf22SSatish Balay 1124606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 112579bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 112679bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1127aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 11280f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 112948e59246SSatish Balay #else 1130606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 113148e59246SSatish Balay #endif 1132606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1133606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1134606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1135606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1136606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1137606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1138606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 113979bdfe76SSatish Balay PLogObjectDestroy(mat); 114079bdfe76SSatish Balay PetscHeaderDestroy(mat); 11413a40ed3dSBarry Smith PetscFunctionReturn(0); 114279bdfe76SSatish Balay } 114379bdfe76SSatish Balay 11445615d1e5SSatish Balay #undef __FUNC__ 1145b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1146ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1147cee3aa6bSSatish Balay { 1148cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 114947b4a8eaSLois Curfman McInnes int ierr,nt; 1150cee3aa6bSSatish Balay 1151d64ed03dSBarry Smith PetscFunctionBegin; 1152e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 115347b4a8eaSLois Curfman McInnes if (nt != a->n) { 1154a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 115547b4a8eaSLois Curfman McInnes } 1156e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 115747b4a8eaSLois Curfman McInnes if (nt != a->m) { 1158a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 115947b4a8eaSLois Curfman McInnes } 116043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1161f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 116243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1163f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 116443a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 11653a40ed3dSBarry Smith PetscFunctionReturn(0); 1166cee3aa6bSSatish Balay } 1167cee3aa6bSSatish Balay 11685615d1e5SSatish Balay #undef __FUNC__ 1169b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1170ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1171cee3aa6bSSatish Balay { 1172cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1173cee3aa6bSSatish Balay int ierr; 1174d64ed03dSBarry Smith 1175d64ed03dSBarry Smith PetscFunctionBegin; 117643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1177f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 117843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1179f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 11803a40ed3dSBarry Smith PetscFunctionReturn(0); 1181cee3aa6bSSatish Balay } 1182cee3aa6bSSatish Balay 11835615d1e5SSatish Balay #undef __FUNC__ 1184b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 11857c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1186cee3aa6bSSatish Balay { 1187cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1188cee3aa6bSSatish Balay int ierr; 1189cee3aa6bSSatish Balay 1190d64ed03dSBarry Smith PetscFunctionBegin; 1191cee3aa6bSSatish Balay /* do nondiagonal part */ 11927c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1193cee3aa6bSSatish Balay /* send it on its way */ 1194537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1195cee3aa6bSSatish Balay /* do local part */ 11967c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1197cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1198cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1199cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1200639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12013a40ed3dSBarry Smith PetscFunctionReturn(0); 1202cee3aa6bSSatish Balay } 1203cee3aa6bSSatish Balay 12045615d1e5SSatish Balay #undef __FUNC__ 1205b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 12067c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1207cee3aa6bSSatish Balay { 1208cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1209cee3aa6bSSatish Balay int ierr; 1210cee3aa6bSSatish Balay 1211d64ed03dSBarry Smith PetscFunctionBegin; 1212cee3aa6bSSatish Balay /* do nondiagonal part */ 12137c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1214cee3aa6bSSatish Balay /* send it on its way */ 1215537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1216cee3aa6bSSatish Balay /* do local part */ 12177c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1218cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1219cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1220cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1221537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12223a40ed3dSBarry Smith PetscFunctionReturn(0); 1223cee3aa6bSSatish Balay } 1224cee3aa6bSSatish Balay 1225cee3aa6bSSatish Balay /* 1226cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1227cee3aa6bSSatish Balay diagonal block 1228cee3aa6bSSatish Balay */ 12295615d1e5SSatish Balay #undef __FUNC__ 1230b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1231ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1232cee3aa6bSSatish Balay { 1233cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12343a40ed3dSBarry Smith int ierr; 1235d64ed03dSBarry Smith 1236d64ed03dSBarry Smith PetscFunctionBegin; 1237a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 12383a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 12393a40ed3dSBarry Smith PetscFunctionReturn(0); 1240cee3aa6bSSatish Balay } 1241cee3aa6bSSatish Balay 12425615d1e5SSatish Balay #undef __FUNC__ 1243b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1244ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1245cee3aa6bSSatish Balay { 1246cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1247cee3aa6bSSatish Balay int ierr; 1248d64ed03dSBarry Smith 1249d64ed03dSBarry Smith PetscFunctionBegin; 1250cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1251cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 12523a40ed3dSBarry Smith PetscFunctionReturn(0); 1253cee3aa6bSSatish Balay } 1254026e39d0SSatish Balay 12555615d1e5SSatish Balay #undef __FUNC__ 1256b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1257ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 125857b952d6SSatish Balay { 125957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1260d64ed03dSBarry Smith 1261d64ed03dSBarry Smith PetscFunctionBegin; 1262bd7f49f5SSatish Balay if (m) *m = mat->M; 1263bd7f49f5SSatish Balay if (n) *n = mat->N; 12643a40ed3dSBarry Smith PetscFunctionReturn(0); 126557b952d6SSatish Balay } 126657b952d6SSatish Balay 12675615d1e5SSatish Balay #undef __FUNC__ 1268b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1269ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 127057b952d6SSatish Balay { 127157b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1272d64ed03dSBarry Smith 1273d64ed03dSBarry Smith PetscFunctionBegin; 1274f830108cSBarry Smith *m = mat->m; *n = mat->n; 12753a40ed3dSBarry Smith PetscFunctionReturn(0); 127657b952d6SSatish Balay } 127757b952d6SSatish Balay 12785615d1e5SSatish Balay #undef __FUNC__ 1279b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1280ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 128157b952d6SSatish Balay { 128257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1283d64ed03dSBarry Smith 1284d64ed03dSBarry Smith PetscFunctionBegin; 1285a21fb8cbSBarry Smith if (m) *m = mat->rstart*mat->bs; 1286a21fb8cbSBarry Smith if (n) *n = mat->rend*mat->bs; 12873a40ed3dSBarry Smith PetscFunctionReturn(0); 128857b952d6SSatish Balay } 128957b952d6SSatish Balay 12905615d1e5SSatish Balay #undef __FUNC__ 1291b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1292acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1293acdf5bf4SSatish Balay { 1294acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1295acdf5bf4SSatish Balay Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1296acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1297d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1298d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1299acdf5bf4SSatish Balay 1300d64ed03dSBarry Smith PetscFunctionBegin; 1301a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1302acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1303acdf5bf4SSatish Balay 1304acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1305acdf5bf4SSatish Balay /* 1306acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1307acdf5bf4SSatish Balay */ 1308acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1309bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1310bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1311acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1312acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1313acdf5bf4SSatish Balay } 1314549d3d68SSatish Balay mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1315acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1316acdf5bf4SSatish Balay } 1317acdf5bf4SSatish Balay 1318a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1319d9d09a02SSatish Balay lrow = row - brstart; 1320acdf5bf4SSatish Balay 1321acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1322acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1323acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1324f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1325f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1326acdf5bf4SSatish Balay nztot = nzA + nzB; 1327acdf5bf4SSatish Balay 1328acdf5bf4SSatish Balay cmap = mat->garray; 1329acdf5bf4SSatish Balay if (v || idx) { 1330acdf5bf4SSatish Balay if (nztot) { 1331acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1332acdf5bf4SSatish Balay int imark = -1; 1333acdf5bf4SSatish Balay if (v) { 1334acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1335acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1336d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1337acdf5bf4SSatish Balay else break; 1338acdf5bf4SSatish Balay } 1339acdf5bf4SSatish Balay imark = i; 1340acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1341acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1342acdf5bf4SSatish Balay } 1343acdf5bf4SSatish Balay if (idx) { 1344acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1345acdf5bf4SSatish Balay if (imark > -1) { 1346acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1347bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1348acdf5bf4SSatish Balay } 1349acdf5bf4SSatish Balay } else { 1350acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1351d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1352d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1353acdf5bf4SSatish Balay else break; 1354acdf5bf4SSatish Balay } 1355acdf5bf4SSatish Balay imark = i; 1356acdf5bf4SSatish Balay } 1357d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1358d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1359acdf5bf4SSatish Balay } 1360d64ed03dSBarry Smith } else { 1361d212a18eSSatish Balay if (idx) *idx = 0; 1362d212a18eSSatish Balay if (v) *v = 0; 1363d212a18eSSatish Balay } 1364acdf5bf4SSatish Balay } 1365acdf5bf4SSatish Balay *nz = nztot; 1366f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1367f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 13683a40ed3dSBarry Smith PetscFunctionReturn(0); 1369acdf5bf4SSatish Balay } 1370acdf5bf4SSatish Balay 13715615d1e5SSatish Balay #undef __FUNC__ 1372b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1373acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1374acdf5bf4SSatish Balay { 1375acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1376d64ed03dSBarry Smith 1377d64ed03dSBarry Smith PetscFunctionBegin; 1378acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1379a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1380acdf5bf4SSatish Balay } 1381acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13823a40ed3dSBarry Smith PetscFunctionReturn(0); 1383acdf5bf4SSatish Balay } 1384acdf5bf4SSatish Balay 13855615d1e5SSatish Balay #undef __FUNC__ 1386b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1387ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 13885a838052SSatish Balay { 13895a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1390d64ed03dSBarry Smith 1391d64ed03dSBarry Smith PetscFunctionBegin; 13925a838052SSatish Balay *bs = baij->bs; 13933a40ed3dSBarry Smith PetscFunctionReturn(0); 13945a838052SSatish Balay } 13955a838052SSatish Balay 13965615d1e5SSatish Balay #undef __FUNC__ 1397b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1398ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 139958667388SSatish Balay { 140058667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 140158667388SSatish Balay int ierr; 1402d64ed03dSBarry Smith 1403d64ed03dSBarry Smith PetscFunctionBegin; 140458667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 140558667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14063a40ed3dSBarry Smith PetscFunctionReturn(0); 140758667388SSatish Balay } 14080ac07820SSatish Balay 14095615d1e5SSatish Balay #undef __FUNC__ 1410b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1411ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14120ac07820SSatish Balay { 14134e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14144e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14157d57db60SLois Curfman McInnes int ierr; 1416329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14170ac07820SSatish Balay 1418d64ed03dSBarry Smith PetscFunctionBegin; 14194e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14204e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14210e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1422de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14234e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14240e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1425de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14260ac07820SSatish Balay if (flag == MAT_LOCAL) { 14274e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14284e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14294e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14304e220ebcSLois Curfman McInnes info->memory = isend[3]; 14314e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14320ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1433f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 14344e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14354e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14364e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14374e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14384e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14390ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1440f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 14414e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14424e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14434e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14444e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14454e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1446d41123aaSBarry Smith } else { 1447d41123aaSBarry Smith SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 14480ac07820SSatish Balay } 14495a5d4f66SBarry Smith info->rows_global = (double)a->M; 14505a5d4f66SBarry Smith info->columns_global = (double)a->N; 14515a5d4f66SBarry Smith info->rows_local = (double)a->m; 14525a5d4f66SBarry Smith info->columns_local = (double)a->N; 14534e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14544e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14554e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14563a40ed3dSBarry Smith PetscFunctionReturn(0); 14570ac07820SSatish Balay } 14580ac07820SSatish Balay 14595615d1e5SSatish Balay #undef __FUNC__ 1460b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1461ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 146258667388SSatish Balay { 146358667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 146498305bb5SBarry Smith int ierr; 146558667388SSatish Balay 1466d64ed03dSBarry Smith PetscFunctionBegin; 146758667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 146858667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 14696da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1470c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 14714787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 14727c922b88SBarry Smith op == MAT_KEEP_ZEROED_ROWS || 14734787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 147498305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 147598305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1476b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 14777c922b88SBarry Smith a->roworiented = PETSC_TRUE; 147898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 147998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1480b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 14816da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 148258667388SSatish Balay op == MAT_SYMMETRIC || 148358667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1484b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 148598305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 148658667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 148798305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 14887c922b88SBarry Smith a->roworiented = PETSC_FALSE; 148998305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 149098305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 14912b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 14927c922b88SBarry Smith a->donotstash = PETSC_TRUE; 1493d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1494d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1495133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 14967c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 1497d64ed03dSBarry Smith } else { 1498d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1499d64ed03dSBarry Smith } 15003a40ed3dSBarry Smith PetscFunctionReturn(0); 150158667388SSatish Balay } 150258667388SSatish Balay 15035615d1e5SSatish Balay #undef __FUNC__ 1504b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1505ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15060ac07820SSatish Balay { 15070ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15080ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15090ac07820SSatish Balay Mat B; 151040011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 15110ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15123eda8832SBarry Smith MatScalar *a; 15130ac07820SSatish Balay 1514d64ed03dSBarry Smith PetscFunctionBegin; 15157c922b88SBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 15167c922b88SBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 15170ac07820SSatish Balay 15180ac07820SSatish Balay /* copy over the A part */ 15190ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15200ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15210ac07820SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 15220ac07820SSatish Balay 15230ac07820SSatish Balay for (i=0; i<mbs; i++) { 15240ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15250ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15260ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15270ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15280ac07820SSatish Balay for (k=0; k<bs; k++) { 152993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15300ac07820SSatish Balay col++; a += bs; 15310ac07820SSatish Balay } 15320ac07820SSatish Balay } 15330ac07820SSatish Balay } 15340ac07820SSatish Balay /* copy over the B part */ 15350ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 15360ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15370ac07820SSatish Balay for (i=0; i<mbs; i++) { 15380ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15390ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15400ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15410ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 15420ac07820SSatish Balay for (k=0; k<bs; k++) { 154393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15440ac07820SSatish Balay col++; a += bs; 15450ac07820SSatish Balay } 15460ac07820SSatish Balay } 15470ac07820SSatish Balay } 1548606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 15490ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15500ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15510ac07820SSatish Balay 15527c922b88SBarry Smith if (matout) { 15530ac07820SSatish Balay *matout = B; 15540ac07820SSatish Balay } else { 1555f830108cSBarry Smith PetscOps *Abops; 1556cc2dc46cSBarry Smith MatOps Aops; 1557f830108cSBarry Smith 15580ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 1559606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 15600ac07820SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 15610ac07820SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1562aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 15630f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1564b1fc9764SSatish Balay #else 1565606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1566b1fc9764SSatish Balay #endif 1567606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1568606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1569606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1570606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1571f830108cSBarry Smith 1572f830108cSBarry Smith /* 1573f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1574f830108cSBarry Smith A pointers for the bops and ops but copy everything 1575f830108cSBarry Smith else from C. 1576f830108cSBarry Smith */ 1577f830108cSBarry Smith Abops = A->bops; 1578f830108cSBarry Smith Aops = A->ops; 1579549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1580f830108cSBarry Smith A->bops = Abops; 1581f830108cSBarry Smith A->ops = Aops; 1582f830108cSBarry Smith 15830ac07820SSatish Balay PetscHeaderDestroy(B); 15840ac07820SSatish Balay } 15853a40ed3dSBarry Smith PetscFunctionReturn(0); 15860ac07820SSatish Balay } 15870e95ebc0SSatish Balay 15885615d1e5SSatish Balay #undef __FUNC__ 1589b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 159036c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 15910e95ebc0SSatish Balay { 159236c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 159336c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 15940e95ebc0SSatish Balay int ierr,s1,s2,s3; 15950e95ebc0SSatish Balay 1596d64ed03dSBarry Smith PetscFunctionBegin; 159736c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 159836c4a09eSSatish Balay if (rr) { 159936c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 160036c4a09eSSatish Balay if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 160136c4a09eSSatish Balay /* Overlap communication with computation. */ 160236c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 160336c4a09eSSatish Balay } 16040e95ebc0SSatish Balay if (ll) { 16050e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 160636c4a09eSSatish Balay if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1607a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16080e95ebc0SSatish Balay } 160936c4a09eSSatish Balay /* scale the diagonal block */ 161036c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 161136c4a09eSSatish Balay 161236c4a09eSSatish Balay if (rr) { 161336c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 161436c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1615a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 161636c4a09eSSatish Balay } 161736c4a09eSSatish Balay 16183a40ed3dSBarry Smith PetscFunctionReturn(0); 16190e95ebc0SSatish Balay } 16200e95ebc0SSatish Balay 16215615d1e5SSatish Balay #undef __FUNC__ 1622b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1623ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 16240ac07820SSatish Balay { 16250ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16260ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1627a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 16280ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16290ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1630a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16310ac07820SSatish Balay MPI_Comm comm = A->comm; 16320ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16330ac07820SSatish Balay MPI_Status recv_status,*send_status; 16340ac07820SSatish Balay IS istmp; 16350ac07820SSatish Balay 1636d64ed03dSBarry Smith PetscFunctionBegin; 16370ac07820SSatish Balay ierr = ISGetSize(is,&N);CHKERRQ(ierr); 16380ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 16390ac07820SSatish Balay 16400ac07820SSatish Balay /* first count number of contributors to each processor */ 16410ac07820SSatish Balay nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1642549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1643549d3d68SSatish Balay procs = nprocs + size; 16440ac07820SSatish Balay owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 16450ac07820SSatish Balay for (i=0; i<N; i++) { 16460ac07820SSatish Balay idx = rows[i]; 16470ac07820SSatish Balay found = 0; 16480ac07820SSatish Balay for (j=0; j<size; j++) { 16490ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 16500ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 16510ac07820SSatish Balay } 16520ac07820SSatish Balay } 1653a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 16540ac07820SSatish Balay } 16550ac07820SSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 16560ac07820SSatish Balay 16570ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 16586831982aSBarry Smith work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 16596831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 16600ac07820SSatish Balay nmax = work[rank]; 16616831982aSBarry Smith nrecvs = work[size+rank]; 1662606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 16630ac07820SSatish Balay 16640ac07820SSatish Balay /* post receives: */ 1665d64ed03dSBarry Smith rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1666d64ed03dSBarry Smith recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 16670ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1668ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 16690ac07820SSatish Balay } 16700ac07820SSatish Balay 16710ac07820SSatish Balay /* do sends: 16720ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 16730ac07820SSatish Balay the ith processor 16740ac07820SSatish Balay */ 16750ac07820SSatish Balay svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1676ca161407SBarry Smith send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 16770ac07820SSatish Balay starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 16780ac07820SSatish Balay starts[0] = 0; 16790ac07820SSatish Balay for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 16800ac07820SSatish Balay for (i=0; i<N; i++) { 16810ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 16820ac07820SSatish Balay } 16836831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 16840ac07820SSatish Balay 16850ac07820SSatish Balay starts[0] = 0; 16860ac07820SSatish Balay for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 16870ac07820SSatish Balay count = 0; 16880ac07820SSatish Balay for (i=0; i<size; i++) { 16890ac07820SSatish Balay if (procs[i]) { 1690ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 16910ac07820SSatish Balay } 16920ac07820SSatish Balay } 1693606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 16940ac07820SSatish Balay 16950ac07820SSatish Balay base = owners[rank]*bs; 16960ac07820SSatish Balay 16970ac07820SSatish Balay /* wait on receives */ 16980ac07820SSatish Balay lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 16990ac07820SSatish Balay source = lens + nrecvs; 17000ac07820SSatish Balay count = nrecvs; slen = 0; 17010ac07820SSatish Balay while (count) { 1702ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17030ac07820SSatish Balay /* unpack receives into our local space */ 1704ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17050ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17060ac07820SSatish Balay lens[imdex] = n; 17070ac07820SSatish Balay slen += n; 17080ac07820SSatish Balay count--; 17090ac07820SSatish Balay } 1710606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17110ac07820SSatish Balay 17120ac07820SSatish Balay /* move the data into the send scatter */ 17130ac07820SSatish Balay lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 17140ac07820SSatish Balay count = 0; 17150ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17160ac07820SSatish Balay values = rvalues + i*nmax; 17170ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17180ac07820SSatish Balay lrows[count++] = values[j] - base; 17190ac07820SSatish Balay } 17200ac07820SSatish Balay } 1721606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1722606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1723606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1724606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17250ac07820SSatish Balay 17260ac07820SSatish Balay /* actually zap the local rows */ 1727029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 17280ac07820SSatish Balay PLogObjectParent(A,istmp); 1729a07cd24cSSatish Balay 173072dacd9aSBarry Smith /* 173172dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 173272dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 173372dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 173472dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 173572dacd9aSBarry Smith 173672dacd9aSBarry Smith Contributed by: Mathew Knepley 173772dacd9aSBarry Smith */ 17389c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 17393eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr); 17409c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 17413eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr); 17429c957beeSSatish Balay } else if (diag) { 17433eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1744fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1745fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1746fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 17476525c446SSatish Balay } 1748a07cd24cSSatish Balay for (i = 0; i < slen; i++) { 1749a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 17503eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1751a07cd24cSSatish Balay } 1752a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17549c957beeSSatish Balay } else { 17553eda8832SBarry Smith ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1756a07cd24cSSatish Balay } 17579c957beeSSatish Balay 17589c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1759606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1760a07cd24cSSatish Balay 17610ac07820SSatish Balay /* wait on sends */ 17620ac07820SSatish Balay if (nsends) { 1763d64ed03dSBarry Smith send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1764ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1765606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 17660ac07820SSatish Balay } 1767606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1768606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 17690ac07820SSatish Balay 17703a40ed3dSBarry Smith PetscFunctionReturn(0); 17710ac07820SSatish Balay } 177272dacd9aSBarry Smith 17735615d1e5SSatish Balay #undef __FUNC__ 1774b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1775ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1776ba4ca20aSSatish Balay { 1777ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 177825fdafccSSatish Balay MPI_Comm comm = A->comm; 1779133cdb44SSatish Balay static int called = 0; 17803a40ed3dSBarry Smith int ierr; 1781ba4ca20aSSatish Balay 1782d64ed03dSBarry Smith PetscFunctionBegin; 17833a40ed3dSBarry Smith if (!a->rank) { 17843a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 178525fdafccSSatish Balay } 178625fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1787d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1788d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 17893a40ed3dSBarry Smith PetscFunctionReturn(0); 1790ba4ca20aSSatish Balay } 17910ac07820SSatish Balay 17925615d1e5SSatish Balay #undef __FUNC__ 1793b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1794ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1795bb5a7306SBarry Smith { 1796bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1797bb5a7306SBarry Smith int ierr; 1798d64ed03dSBarry Smith 1799d64ed03dSBarry Smith PetscFunctionBegin; 1800bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18013a40ed3dSBarry Smith PetscFunctionReturn(0); 1802bb5a7306SBarry Smith } 1803bb5a7306SBarry Smith 18042e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18050ac07820SSatish Balay 18067fc3c18eSBarry Smith #undef __FUNC__ 1807b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 18087fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18097fc3c18eSBarry Smith { 18107fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18117fc3c18eSBarry Smith Mat a,b,c,d; 18127fc3c18eSBarry Smith PetscTruth flg; 18137fc3c18eSBarry Smith int ierr; 18147fc3c18eSBarry Smith 18157fc3c18eSBarry Smith PetscFunctionBegin; 18167fc3c18eSBarry Smith if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 18177fc3c18eSBarry Smith a = matA->A; b = matA->B; 18187fc3c18eSBarry Smith c = matB->A; d = matB->B; 18197fc3c18eSBarry Smith 18207fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18217fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18227fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18237fc3c18eSBarry Smith } 18247fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18257fc3c18eSBarry Smith PetscFunctionReturn(0); 18267fc3c18eSBarry Smith } 18277fc3c18eSBarry Smith 182879bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1829cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1830cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1831cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1832cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1833cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1834cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 18357c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 18367c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1837cc2dc46cSBarry Smith 0, 1838cc2dc46cSBarry Smith 0, 1839cc2dc46cSBarry Smith 0, 1840cc2dc46cSBarry Smith 0, 1841cc2dc46cSBarry Smith 0, 1842cc2dc46cSBarry Smith 0, 1843cc2dc46cSBarry Smith 0, 1844cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1845cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 18467fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1847cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1848cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1849cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1850cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1851cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1852cc2dc46cSBarry Smith 0, 1853cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1854cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1855cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1856cc2dc46cSBarry Smith 0, 1857cc2dc46cSBarry Smith 0, 1858cc2dc46cSBarry Smith 0, 1859cc2dc46cSBarry Smith 0, 1860cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1861cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1862cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1863cc2dc46cSBarry Smith 0, 1864cc2dc46cSBarry Smith 0, 1865cc2dc46cSBarry Smith 0, 1866cc2dc46cSBarry Smith 0, 18672e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1868cc2dc46cSBarry Smith 0, 1869cc2dc46cSBarry Smith 0, 1870cc2dc46cSBarry Smith 0, 1871cc2dc46cSBarry Smith 0, 1872cc2dc46cSBarry Smith 0, 1873cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1874cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1875cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1876cc2dc46cSBarry Smith 0, 1877cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1878cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1879cc2dc46cSBarry Smith 0, 1880cc2dc46cSBarry Smith 0, 1881cc2dc46cSBarry Smith 0, 1882cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1883cc2dc46cSBarry Smith 0, 1884cc2dc46cSBarry Smith 0, 1885cc2dc46cSBarry Smith 0, 1886cc2dc46cSBarry Smith 0, 1887cc2dc46cSBarry Smith 0, 1888cc2dc46cSBarry Smith 0, 1889cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1890cc2dc46cSBarry Smith 0, 1891cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1892cc2dc46cSBarry Smith 0, 1893cc2dc46cSBarry Smith 0, 1894cc2dc46cSBarry Smith 0, 1895cc2dc46cSBarry Smith MatGetMaps_Petsc}; 189679bdfe76SSatish Balay 18975ef9f2a5SBarry Smith 1898e18c124aSSatish Balay EXTERN_C_BEGIN 18995ef9f2a5SBarry Smith #undef __FUNC__ 1900b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 19015ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19025ef9f2a5SBarry Smith { 19035ef9f2a5SBarry Smith PetscFunctionBegin; 19045ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19055ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19065ef9f2a5SBarry Smith PetscFunctionReturn(0); 19075ef9f2a5SBarry Smith } 1908e18c124aSSatish Balay EXTERN_C_END 190979bdfe76SSatish Balay 19105615d1e5SSatish Balay #undef __FUNC__ 1911b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 191279bdfe76SSatish Balay /*@C 191379bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 191479bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 191579bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 191679bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 191779bdfe76SSatish Balay performance can be increased by more than a factor of 50. 191879bdfe76SSatish Balay 1919db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1920db81eaa0SLois Curfman McInnes 192179bdfe76SSatish Balay Input Parameters: 1922db81eaa0SLois Curfman McInnes + comm - MPI communicator 192379bdfe76SSatish Balay . bs - size of blockk 192479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 192592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 192692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 192792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 192892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 192992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1930be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1931be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 193279bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 193379bdfe76SSatish Balay submatrix (same for all local rows) 19347fc3c18eSBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 193592e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 1936db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 193792e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 193879bdfe76SSatish Balay submatrix (same for all local rows). 19397fc3c18eSBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 194092e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 194192e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 194279bdfe76SSatish Balay 194379bdfe76SSatish Balay Output Parameter: 194479bdfe76SSatish Balay . A - the matrix 194579bdfe76SSatish Balay 1946db81eaa0SLois Curfman McInnes Options Database Keys: 1947db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 1948db81eaa0SLois Curfman McInnes block calculations (much slower) 1949db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 1950494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 1951494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 19523ffaccefSLois Curfman McInnes 1953b259b22eSLois Curfman McInnes Notes: 195479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 195579bdfe76SSatish Balay (possibly both). 195679bdfe76SSatish Balay 1957be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1958be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 1959be79a94dSBarry Smith 196079bdfe76SSatish Balay Storage Information: 196179bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 196279bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 196379bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 196479bdfe76SSatish Balay local matrix (a rectangular submatrix). 196579bdfe76SSatish Balay 196679bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 196779bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 196879bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 196979bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 197079bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 197179bdfe76SSatish Balay 197279bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 197379bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 197479bdfe76SSatish Balay 1975db81eaa0SLois Curfman McInnes .vb 1976db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 1977db81eaa0SLois Curfman McInnes ------------------- 1978db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 1979db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 1980db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 1981db81eaa0SLois Curfman McInnes ------------------- 1982db81eaa0SLois Curfman McInnes .ve 198379bdfe76SSatish Balay 198479bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 198579bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 198679bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 198757b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 198879bdfe76SSatish Balay 1989d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1990d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 199179bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 199292e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 199392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 19946da5968aSLois Curfman McInnes matrices. 199579bdfe76SSatish Balay 1996027ccd11SLois Curfman McInnes Level: intermediate 1997027ccd11SLois Curfman McInnes 199892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 199979bdfe76SSatish Balay 20003eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 200179bdfe76SSatish Balay @*/ 2002329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 200379bdfe76SSatish Balay { 200479bdfe76SSatish Balay Mat B; 200579bdfe76SSatish Balay Mat_MPIBAIJ *b; 2006f1af5d2fSBarry Smith int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2007f1af5d2fSBarry Smith PetscTruth flag1,flag2,flg; 200879bdfe76SSatish Balay 2009d64ed03dSBarry Smith PetscFunctionBegin; 2010962fb4a0SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2011962fb4a0SBarry Smith 2012a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 201336c4a09eSSatish Balay if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 201436c4a09eSSatish Balay if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 20154fdb0a08SBarry Smith if (d_nnz) { 201636c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 20174fdb0a08SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 20184fdb0a08SBarry Smith } 20194fdb0a08SBarry Smith } 20204fdb0a08SBarry Smith if (o_nnz) { 202136c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 20224fdb0a08SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 20234fdb0a08SBarry Smith } 20244fdb0a08SBarry Smith } 20253914022bSBarry Smith 2026d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2027494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2028494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2029494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 20303914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 20313914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 20323914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 20333a40ed3dSBarry Smith PetscFunctionReturn(0); 20343914022bSBarry Smith } 20353914022bSBarry Smith 203679bdfe76SSatish Balay *A = 0; 20373f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 203879bdfe76SSatish Balay PLogObjectCreate(B); 203979bdfe76SSatish Balay B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2040549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2041549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 20424c50302cSBarry Smith 2043e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 2044e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 204590f02eecSBarry Smith B->mapping = 0; 204679bdfe76SSatish Balay B->factor = 0; 204779bdfe76SSatish Balay B->assembled = PETSC_FALSE; 204879bdfe76SSatish Balay 2049e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 2050d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2051d132466eSBarry Smith ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 205279bdfe76SSatish Balay 20537c922b88SBarry Smith if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2054a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2055d64ed03dSBarry Smith } 2056a8c6a408SBarry Smith if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2057a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2058a8c6a408SBarry Smith } 2059a8c6a408SBarry Smith if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2060a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2061a8c6a408SBarry Smith } 2062cee3aa6bSSatish Balay if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2063cee3aa6bSSatish Balay if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 206479bdfe76SSatish Balay 206579bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 206679bdfe76SSatish Balay work[0] = m; work[1] = n; 206779bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2068ca161407SBarry Smith ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 206979bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 207079bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 207179bdfe76SSatish Balay } 207279bdfe76SSatish Balay if (m == PETSC_DECIDE) { 207379bdfe76SSatish Balay Mbs = M/bs; 2074a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 207579bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 207679bdfe76SSatish Balay m = mbs*bs; 207779bdfe76SSatish Balay } 207879bdfe76SSatish Balay if (n == PETSC_DECIDE) { 207979bdfe76SSatish Balay Nbs = N/bs; 2080a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 208179bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 208279bdfe76SSatish Balay n = nbs*bs; 208379bdfe76SSatish Balay } 2084a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2085a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2086a8c6a408SBarry Smith } 208779bdfe76SSatish Balay 208879bdfe76SSatish Balay b->m = m; B->m = m; 208979bdfe76SSatish Balay b->n = n; B->n = n; 209079bdfe76SSatish Balay b->N = N; B->N = N; 209179bdfe76SSatish Balay b->M = M; B->M = M; 209279bdfe76SSatish Balay b->bs = bs; 209379bdfe76SSatish Balay b->bs2 = bs*bs; 209479bdfe76SSatish Balay b->mbs = mbs; 209579bdfe76SSatish Balay b->nbs = nbs; 209679bdfe76SSatish Balay b->Mbs = Mbs; 209779bdfe76SSatish Balay b->Nbs = Nbs; 209879bdfe76SSatish Balay 2099c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2100c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2101488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2102488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2103c7fcc2eaSBarry Smith 210479bdfe76SSatish Balay /* build local table of row and column ownerships */ 2105ff2fd236SBarry Smith b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2106ff2fd236SBarry Smith PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 21070ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2108ff2fd236SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2109ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 211079bdfe76SSatish Balay b->rowners[0] = 0; 211179bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 211279bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 211379bdfe76SSatish Balay } 2114ff2fd236SBarry Smith for (i=0; i<=b->size; i++) { 2115ff2fd236SBarry Smith b->rowners_bs[i] = b->rowners[i]*bs; 2116ff2fd236SBarry Smith } 211779bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 211879bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 21194fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 21204fa0d573SSatish Balay b->rend_bs = b->rend * bs; 21214fa0d573SSatish Balay 2122ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 212379bdfe76SSatish Balay b->cowners[0] = 0; 212479bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 212579bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 212679bdfe76SSatish Balay } 212779bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 212879bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 21294fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 21304fa0d573SSatish Balay b->cend_bs = b->cend * bs; 213179bdfe76SSatish Balay 2132a07cd24cSSatish Balay 213379bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2134029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 213579bdfe76SSatish Balay PLogObjectParent(B,b->A); 213679bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2137029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 213879bdfe76SSatish Balay PLogObjectParent(B,b->B); 213979bdfe76SSatish Balay 214079bdfe76SSatish Balay /* build cache for off array entries formed */ 21418798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 21428798bf22SSatish Balay ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 21437c922b88SBarry Smith b->donotstash = PETSC_FALSE; 21447c922b88SBarry Smith b->colmap = PETSC_NULL; 21457c922b88SBarry Smith b->garray = PETSC_NULL; 21467c922b88SBarry Smith b->roworiented = PETSC_TRUE; 214779bdfe76SSatish Balay 214830793edcSSatish Balay /* stuff used in block assembly */ 214930793edcSSatish Balay b->barray = 0; 215030793edcSSatish Balay 215179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 215279bdfe76SSatish Balay b->lvec = 0; 215379bdfe76SSatish Balay b->Mvctx = 0; 215479bdfe76SSatish Balay 215579bdfe76SSatish Balay /* stuff for MatGetRow() */ 215679bdfe76SSatish Balay b->rowindices = 0; 215779bdfe76SSatish Balay b->rowvalues = 0; 215879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 215979bdfe76SSatish Balay 2160a07cd24cSSatish Balay /* hash table stuff */ 2161a07cd24cSSatish Balay b->ht = 0; 2162187ce0cbSSatish Balay b->hd = 0; 21630bdbc534SSatish Balay b->ht_size = 0; 21647c922b88SBarry Smith b->ht_flag = PETSC_FALSE; 216525fdafccSSatish Balay b->ht_fact = 0; 2166187ce0cbSSatish Balay b->ht_total_ct = 0; 2167187ce0cbSSatish Balay b->ht_insert_ct = 0; 2168a07cd24cSSatish Balay 216979bdfe76SSatish Balay *A = B; 2170133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2171133cdb44SSatish Balay if (flg) { 2172133cdb44SSatish Balay double fact = 1.39; 2173133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2174f1af5d2fSBarry Smith ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2175133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2176133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2177133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2178133cdb44SSatish Balay } 2179f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 21807fc3c18eSBarry Smith "MatStoreValues_MPIBAIJ", 21817fc3c18eSBarry Smith (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2182f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 21837fc3c18eSBarry Smith "MatRetrieveValues_MPIBAIJ", 21847fc3c18eSBarry Smith (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2185f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 21865ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 21875ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 21883a40ed3dSBarry Smith PetscFunctionReturn(0); 218979bdfe76SSatish Balay } 2190026e39d0SSatish Balay 21915615d1e5SSatish Balay #undef __FUNC__ 2192b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 21932e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 21940ac07820SSatish Balay { 21950ac07820SSatish Balay Mat mat; 21960ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2197f1af5d2fSBarry Smith int ierr,len=0; 2198f1af5d2fSBarry Smith PetscTruth flg; 21990ac07820SSatish Balay 2200d64ed03dSBarry Smith PetscFunctionBegin; 22010ac07820SSatish Balay *newmat = 0; 22023f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 22030ac07820SSatish Balay PLogObjectCreate(mat); 22040ac07820SSatish Balay mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2205549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2206e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2207e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 22080ac07820SSatish Balay mat->factor = matin->factor; 22090ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2210aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 22110ac07820SSatish Balay 22120ac07820SSatish Balay a->m = mat->m = oldmat->m; 22130ac07820SSatish Balay a->n = mat->n = oldmat->n; 22140ac07820SSatish Balay a->M = mat->M = oldmat->M; 22150ac07820SSatish Balay a->N = mat->N = oldmat->N; 22160ac07820SSatish Balay 22170ac07820SSatish Balay a->bs = oldmat->bs; 22180ac07820SSatish Balay a->bs2 = oldmat->bs2; 22190ac07820SSatish Balay a->mbs = oldmat->mbs; 22200ac07820SSatish Balay a->nbs = oldmat->nbs; 22210ac07820SSatish Balay a->Mbs = oldmat->Mbs; 22220ac07820SSatish Balay a->Nbs = oldmat->Nbs; 22230ac07820SSatish Balay 22240ac07820SSatish Balay a->rstart = oldmat->rstart; 22250ac07820SSatish Balay a->rend = oldmat->rend; 22260ac07820SSatish Balay a->cstart = oldmat->cstart; 22270ac07820SSatish Balay a->cend = oldmat->cend; 22280ac07820SSatish Balay a->size = oldmat->size; 22290ac07820SSatish Balay a->rank = oldmat->rank; 2230aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2231aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2232aef5e8e0SSatish Balay a->rowindices = 0; 22330ac07820SSatish Balay a->rowvalues = 0; 22340ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 223530793edcSSatish Balay a->barray = 0; 22363123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 22373123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 22383123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 22393123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 22400ac07820SSatish Balay 2241133cdb44SSatish Balay /* hash table stuff */ 2242133cdb44SSatish Balay a->ht = 0; 2243133cdb44SSatish Balay a->hd = 0; 2244133cdb44SSatish Balay a->ht_size = 0; 2245133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 224625fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2247133cdb44SSatish Balay a->ht_total_ct = 0; 2248133cdb44SSatish Balay a->ht_insert_ct = 0; 2249133cdb44SSatish Balay 2250133cdb44SSatish Balay 2251ff2fd236SBarry Smith a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2252ff2fd236SBarry Smith PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 22530ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 2254ff2fd236SBarry Smith a->rowners_bs = a->cowners + a->size + 2; 2255549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 22568798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 22578798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 22580ac07820SSatish Balay if (oldmat->colmap) { 2259aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 22600f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 226148e59246SSatish Balay #else 22620ac07820SSatish Balay a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 22630ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2264549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 226548e59246SSatish Balay #endif 22660ac07820SSatish Balay } else a->colmap = 0; 22670ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 22680ac07820SSatish Balay a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 22690ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 2270549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 22710ac07820SSatish Balay } else a->garray = 0; 22720ac07820SSatish Balay 22730ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 22740ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 22750ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2276e18c124aSSatish Balay 22770ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 22782e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 22790ac07820SSatish Balay PLogObjectParent(mat,a->A); 22802e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 22810ac07820SSatish Balay PLogObjectParent(mat,a->B); 22820ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2283e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 22840ac07820SSatish Balay if (flg) { 22850ac07820SSatish Balay ierr = MatPrintHelp(mat);CHKERRQ(ierr); 22860ac07820SSatish Balay } 22870ac07820SSatish Balay *newmat = mat; 22883a40ed3dSBarry Smith PetscFunctionReturn(0); 22890ac07820SSatish Balay } 229057b952d6SSatish Balay 229157b952d6SSatish Balay #include "sys.h" 229257b952d6SSatish Balay 22935615d1e5SSatish Balay #undef __FUNC__ 2294b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 229557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 229657b952d6SSatish Balay { 229757b952d6SSatish Balay Mat A; 229857b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 229957b952d6SSatish Balay Scalar *vals,*buf; 230057b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 230157b952d6SSatish Balay MPI_Status status; 2302cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 230357b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2304f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 230557b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 230657b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 230757b952d6SSatish Balay 2308d64ed03dSBarry Smith PetscFunctionBegin; 2309f1af5d2fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 231057b952d6SSatish Balay 2311d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2312d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 231357b952d6SSatish Balay if (!rank) { 231457b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2315e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2316a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2317d64ed03dSBarry Smith if (header[3] < 0) { 2318a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2319d64ed03dSBarry Smith } 23206c5fab8fSBarry Smith } 2321d64ed03dSBarry Smith 2322ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 232357b952d6SSatish Balay M = header[1]; N = header[2]; 232457b952d6SSatish Balay 2325a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 232657b952d6SSatish Balay 232757b952d6SSatish Balay /* 232857b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 232957b952d6SSatish Balay divisible by the blocksize 233057b952d6SSatish Balay */ 233157b952d6SSatish Balay Mbs = M/bs; 233257b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 233357b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 233457b952d6SSatish Balay else Mbs++; 233557b952d6SSatish Balay if (extra_rows &&!rank) { 2336b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 233757b952d6SSatish Balay } 2338537820f0SBarry Smith 233957b952d6SSatish Balay /* determine ownership of all rows */ 234057b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 234157b952d6SSatish Balay m = mbs * bs; 2342cee3aa6bSSatish Balay rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2343cee3aa6bSSatish Balay browners = rowners + size + 1; 2344ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 234557b952d6SSatish Balay rowners[0] = 0; 2346cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2347cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 234857b952d6SSatish Balay rstart = rowners[rank]; 234957b952d6SSatish Balay rend = rowners[rank+1]; 235057b952d6SSatish Balay 235157b952d6SSatish Balay /* distribute row lengths to all processors */ 235257b952d6SSatish Balay locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 235357b952d6SSatish Balay if (!rank) { 235457b952d6SSatish Balay rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2355e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 235657b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 235757b952d6SSatish Balay sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2358cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2359ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2360606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2361d64ed03dSBarry Smith } else { 2362ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 236357b952d6SSatish Balay } 236457b952d6SSatish Balay 236557b952d6SSatish Balay if (!rank) { 236657b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 236757b952d6SSatish Balay procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2368549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 236957b952d6SSatish Balay for (i=0; i<size; i++) { 237057b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 237157b952d6SSatish Balay procsnz[i] += rowlengths[j]; 237257b952d6SSatish Balay } 237357b952d6SSatish Balay } 2374606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 237557b952d6SSatish Balay 237657b952d6SSatish Balay /* determine max buffer needed and allocate it */ 237757b952d6SSatish Balay maxnz = 0; 237857b952d6SSatish Balay for (i=0; i<size; i++) { 237957b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 238057b952d6SSatish Balay } 238157b952d6SSatish Balay cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 238257b952d6SSatish Balay 238357b952d6SSatish Balay /* read in my part of the matrix column indices */ 238457b952d6SSatish Balay nz = procsnz[0]; 238557b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 238657b952d6SSatish Balay mycols = ibuf; 2387cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2388e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2389cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2390cee3aa6bSSatish Balay 239157b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 239257b952d6SSatish Balay for (i=1; i<size-1; i++) { 239357b952d6SSatish Balay nz = procsnz[i]; 2394e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2395ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 239657b952d6SSatish Balay } 239757b952d6SSatish Balay /* read in the stuff for the last proc */ 239857b952d6SSatish Balay if (size != 1) { 239957b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2400e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 240157b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2402ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 240357b952d6SSatish Balay } 2404606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2405d64ed03dSBarry Smith } else { 240657b952d6SSatish Balay /* determine buffer space needed for message */ 240757b952d6SSatish Balay nz = 0; 240857b952d6SSatish Balay for (i=0; i<m; i++) { 240957b952d6SSatish Balay nz += locrowlens[i]; 241057b952d6SSatish Balay } 241157b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 241257b952d6SSatish Balay mycols = ibuf; 241357b952d6SSatish Balay /* receive message of column indices*/ 2414ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2415ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2416a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 241757b952d6SSatish Balay } 241857b952d6SSatish Balay 241957b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2420cee3aa6bSSatish Balay dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2421cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 242257b952d6SSatish Balay mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2423549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 242457b952d6SSatish Balay masked1 = mask + Mbs; 242557b952d6SSatish Balay masked2 = masked1 + Mbs; 242657b952d6SSatish Balay rowcount = 0; nzcount = 0; 242757b952d6SSatish Balay for (i=0; i<mbs; i++) { 242857b952d6SSatish Balay dcount = 0; 242957b952d6SSatish Balay odcount = 0; 243057b952d6SSatish Balay for (j=0; j<bs; j++) { 243157b952d6SSatish Balay kmax = locrowlens[rowcount]; 243257b952d6SSatish Balay for (k=0; k<kmax; k++) { 243357b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 243457b952d6SSatish Balay if (!mask[tmp]) { 243557b952d6SSatish Balay mask[tmp] = 1; 243657b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 243757b952d6SSatish Balay else masked1[dcount++] = tmp; 243857b952d6SSatish Balay } 243957b952d6SSatish Balay } 244057b952d6SSatish Balay rowcount++; 244157b952d6SSatish Balay } 2442cee3aa6bSSatish Balay 244357b952d6SSatish Balay dlens[i] = dcount; 244457b952d6SSatish Balay odlens[i] = odcount; 2445cee3aa6bSSatish Balay 244657b952d6SSatish Balay /* zero out the mask elements we set */ 244757b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 244857b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 244957b952d6SSatish Balay } 2450cee3aa6bSSatish Balay 245157b952d6SSatish Balay /* create our matrix */ 2452549d3d68SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 245357b952d6SSatish Balay A = *newmat; 24546d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 245557b952d6SSatish Balay 245657b952d6SSatish Balay if (!rank) { 245757b952d6SSatish Balay buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 245857b952d6SSatish Balay /* read in my part of the matrix numerical values */ 245957b952d6SSatish Balay nz = procsnz[0]; 246057b952d6SSatish Balay vals = buf; 2461cee3aa6bSSatish Balay mycols = ibuf; 2462cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2463e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2464cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2465537820f0SBarry Smith 246657b952d6SSatish Balay /* insert into matrix */ 246757b952d6SSatish Balay jj = rstart*bs; 246857b952d6SSatish Balay for (i=0; i<m; i++) { 24693eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 247057b952d6SSatish Balay mycols += locrowlens[i]; 247157b952d6SSatish Balay vals += locrowlens[i]; 247257b952d6SSatish Balay jj++; 247357b952d6SSatish Balay } 247457b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 247557b952d6SSatish Balay for (i=1; i<size-1; i++) { 247657b952d6SSatish Balay nz = procsnz[i]; 247757b952d6SSatish Balay vals = buf; 2478e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2479ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 248057b952d6SSatish Balay } 248157b952d6SSatish Balay /* the last proc */ 248257b952d6SSatish Balay if (size != 1){ 248357b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2484cee3aa6bSSatish Balay vals = buf; 2485e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 248657b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2487ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 248857b952d6SSatish Balay } 2489606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2490d64ed03dSBarry Smith } else { 249157b952d6SSatish Balay /* receive numeric values */ 249257b952d6SSatish Balay buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 249357b952d6SSatish Balay 249457b952d6SSatish Balay /* receive message of values*/ 249557b952d6SSatish Balay vals = buf; 2496cee3aa6bSSatish Balay mycols = ibuf; 2497ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2498ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2499a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 250057b952d6SSatish Balay 250157b952d6SSatish Balay /* insert into matrix */ 250257b952d6SSatish Balay jj = rstart*bs; 2503cee3aa6bSSatish Balay for (i=0; i<m; i++) { 25043eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 250557b952d6SSatish Balay mycols += locrowlens[i]; 250657b952d6SSatish Balay vals += locrowlens[i]; 250757b952d6SSatish Balay jj++; 250857b952d6SSatish Balay } 250957b952d6SSatish Balay } 2510606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2511606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2512606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2513606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2514606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2515606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 25166d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25176d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25183a40ed3dSBarry Smith PetscFunctionReturn(0); 251957b952d6SSatish Balay } 252057b952d6SSatish Balay 2521133cdb44SSatish Balay #undef __FUNC__ 2522b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2523133cdb44SSatish Balay /*@ 2524133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2525133cdb44SSatish Balay 2526133cdb44SSatish Balay Input Parameters: 2527133cdb44SSatish Balay . mat - the matrix 2528133cdb44SSatish Balay . fact - factor 2529133cdb44SSatish Balay 2530fee21e36SBarry Smith Collective on Mat 2531fee21e36SBarry Smith 25328c890885SBarry Smith Level: advanced 25338c890885SBarry Smith 2534133cdb44SSatish Balay Notes: 2535133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2536133cdb44SSatish Balay 2537133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2538133cdb44SSatish Balay 2539133cdb44SSatish Balay .seealso: MatSetOption() 2540133cdb44SSatish Balay @*/ 2541329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2542133cdb44SSatish Balay { 254325fdafccSSatish Balay Mat_MPIBAIJ *baij; 2544133cdb44SSatish Balay 2545133cdb44SSatish Balay PetscFunctionBegin; 2546133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 254725fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2548133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2549133cdb44SSatish Balay } 2550133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2551133cdb44SSatish Balay baij->ht_fact = fact; 2552133cdb44SSatish Balay PetscFunctionReturn(0); 2553133cdb44SSatish Balay } 2554