1*6fa18ffdSBarry Smith /*$Id: mpibaij.c,v 1.192 2000/04/12 04:23:40 bsmith Exp bsmith $*/ 279bdfe76SSatish Balay 377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 579bdfe76SSatish Balay 657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat); 1693fea6afSBarry Smith extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*); 1793fea6afSBarry Smith 1893fea6afSBarry Smith /* UGLY, ugly, ugly 1993fea6afSBarry Smith When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 2093fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2193fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2293fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2393fea6afSBarry Smith into the single precision data structures. 2493fea6afSBarry Smith */ 2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 26*6fa18ffdSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 2793fea6afSBarry Smith extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 2854750694SBarry Smith extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29*6fa18ffdSBarry Smith extern int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30*6fa18ffdSBarry Smith extern int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 3193fea6afSBarry Smith #else 32*6fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35*6fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36*6fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3793fea6afSBarry Smith #endif 383b2fbd54SBarry Smith 397fc3c18eSBarry Smith EXTERN_C_BEGIN 407fc3c18eSBarry Smith #undef __FUNC__ 41*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 427fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 437fc3c18eSBarry Smith { 447fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 457fc3c18eSBarry Smith int ierr; 467fc3c18eSBarry Smith 477fc3c18eSBarry Smith PetscFunctionBegin; 487fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 497fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 507fc3c18eSBarry Smith PetscFunctionReturn(0); 517fc3c18eSBarry Smith } 527fc3c18eSBarry Smith EXTERN_C_END 537fc3c18eSBarry Smith 547fc3c18eSBarry Smith EXTERN_C_BEGIN 557fc3c18eSBarry Smith #undef __FUNC__ 56*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 577fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 587fc3c18eSBarry Smith { 597fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 607fc3c18eSBarry Smith int ierr; 617fc3c18eSBarry Smith 627fc3c18eSBarry Smith PetscFunctionBegin; 637fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 647fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 657fc3c18eSBarry Smith PetscFunctionReturn(0); 667fc3c18eSBarry Smith } 677fc3c18eSBarry Smith EXTERN_C_END 687fc3c18eSBarry Smith 69537820f0SBarry Smith /* 70537820f0SBarry Smith Local utility routine that creates a mapping from the global column 7157b952d6SSatish Balay number to the local number in the off-diagonal part of the local 7257b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 7357b952d6SSatish Balay length of colmap equals the global matrix length. 7457b952d6SSatish Balay */ 755615d1e5SSatish Balay #undef __FUNC__ 76*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private" 7757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 7857b952d6SSatish Balay { 7957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 8057b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 81dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 8257b952d6SSatish Balay 83d64ed03dSBarry Smith PetscFunctionBegin; 84aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 850f5bd95cSBarry Smith ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 8648e59246SSatish Balay for (i=0; i<nbs; i++){ 870f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 8848e59246SSatish Balay } 8948e59246SSatish Balay #else 90758f045eSSatish Balay baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 9157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 92549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 93928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 9448e59246SSatish Balay #endif 953a40ed3dSBarry Smith PetscFunctionReturn(0); 9657b952d6SSatish Balay } 9757b952d6SSatish Balay 9880c1aa95SSatish Balay #define CHUNKSIZE 10 9980c1aa95SSatish Balay 100f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 10180c1aa95SSatish Balay { \ 10280c1aa95SSatish Balay \ 10380c1aa95SSatish Balay brow = row/bs; \ 10480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 10680c1aa95SSatish Balay bcol = col/bs; \ 10780c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 108ab26458aSBarry Smith low = 0; high = nrow; \ 109ab26458aSBarry Smith while (high-low > 3) { \ 110ab26458aSBarry Smith t = (low+high)/2; \ 111ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 112ab26458aSBarry Smith else low = t; \ 113ab26458aSBarry Smith } \ 114ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11580c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11680c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11780c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 118eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 119eada6651SSatish Balay else *bap = value; \ 120ac7a638eSSatish Balay goto a_noinsert; \ 12180c1aa95SSatish Balay } \ 12280c1aa95SSatish Balay } \ 12389280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 124a8c6a408SBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 12580c1aa95SSatish Balay if (nrow >= rmax) { \ 12680c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 12780c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1283eda8832SBarry Smith MatScalar *new_a; \ 12980c1aa95SSatish Balay \ 130a8c6a408SBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 13189280ab3SLois Curfman McInnes \ 13280c1aa95SSatish Balay /* malloc new storage space */ \ 1333eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 1343eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 13580c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 13680c1aa95SSatish Balay new_i = new_j + new_nz; \ 13780c1aa95SSatish Balay \ 13880c1aa95SSatish Balay /* copy over old data into new slots */ \ 13980c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 14080c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 141549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 14280c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1433eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1443eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 145549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 146549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1473eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 14880c1aa95SSatish Balay /* free up old matrix storage */ \ 149606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 150606d414cSSatish Balay if (!a->singlemalloc) { \ 151606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 152606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 153606d414cSSatish Balay } \ 15480c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1557c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 15680c1aa95SSatish Balay \ 15780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 158ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 1593eda8832SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 16080c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 16180c1aa95SSatish Balay a->reallocs++; \ 16280c1aa95SSatish Balay a->nz++; \ 16380c1aa95SSatish Balay } \ 16480c1aa95SSatish Balay N = nrow++ - 1; \ 16580c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 16680c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 16780c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1683eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16980c1aa95SSatish Balay } \ 1703eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 17180c1aa95SSatish Balay rp[_i] = bcol; \ 17280c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 173ac7a638eSSatish Balay a_noinsert:; \ 17480c1aa95SSatish Balay ailen[brow] = nrow; \ 17580c1aa95SSatish Balay } 17657b952d6SSatish Balay 177ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 178ac7a638eSSatish Balay { \ 179ac7a638eSSatish Balay brow = row/bs; \ 180ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 181ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 182ac7a638eSSatish Balay bcol = col/bs; \ 183ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 184ac7a638eSSatish Balay low = 0; high = nrow; \ 185ac7a638eSSatish Balay while (high-low > 3) { \ 186ac7a638eSSatish Balay t = (low+high)/2; \ 187ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 188ac7a638eSSatish Balay else low = t; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 191ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 192ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 193ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 194ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 195ac7a638eSSatish Balay else *bap = value; \ 196ac7a638eSSatish Balay goto b_noinsert; \ 197ac7a638eSSatish Balay } \ 198ac7a638eSSatish Balay } \ 19989280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 200a8c6a408SBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 201ac7a638eSSatish Balay if (nrow >= rmax) { \ 202ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 20374c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2043eda8832SBarry Smith MatScalar *new_a; \ 205ac7a638eSSatish Balay \ 206a8c6a408SBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 20789280ab3SLois Curfman McInnes \ 208ac7a638eSSatish Balay /* malloc new storage space */ \ 2093eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 2103eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 211ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 212ac7a638eSSatish Balay new_i = new_j + new_nz; \ 213ac7a638eSSatish Balay \ 214ac7a638eSSatish Balay /* copy over old data into new slots */ \ 215ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 21674c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 217549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 218ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2193eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2203eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2213eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 222549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2233eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 224ac7a638eSSatish Balay /* free up old matrix storage */ \ 225606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 226606d414cSSatish Balay if (!b->singlemalloc) { \ 227606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 228606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 229606d414cSSatish Balay } \ 23074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2317c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 232ac7a638eSSatish Balay \ 233ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 234ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 2353eda8832SBarry Smith PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 23674c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 23774c639caSSatish Balay b->reallocs++; \ 23874c639caSSatish Balay b->nz++; \ 239ac7a638eSSatish Balay } \ 240ac7a638eSSatish Balay N = nrow++ - 1; \ 241ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 242ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 243ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2443eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 245ac7a638eSSatish Balay } \ 2463eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 247ac7a638eSSatish Balay rp[_i] = bcol; \ 248ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 249ac7a638eSSatish Balay b_noinsert:; \ 250ac7a638eSSatish Balay bilen[brow] = nrow; \ 251ac7a638eSSatish Balay } 252ac7a638eSSatish Balay 25393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2545615d1e5SSatish Balay #undef __FUNC__ 255*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 256ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 25757b952d6SSatish Balay { 258*6fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 25993fea6afSBarry Smith int ierr,i,N = m*n; 260*6fa18ffdSBarry Smith MatScalar *vsingle; 26193fea6afSBarry Smith 26293fea6afSBarry Smith PetscFunctionBegin; 263*6fa18ffdSBarry Smith if (N > b->setvalueslen) { 264*6fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 265*6fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 266*6fa18ffdSBarry Smith b->setvalueslen = N; 267*6fa18ffdSBarry Smith } 268*6fa18ffdSBarry Smith vsingle = b->setvaluescopy; 269*6fa18ffdSBarry Smith 27093fea6afSBarry Smith for (i=0; i<N; i++) { 27193fea6afSBarry Smith vsingle[i] = v[i]; 27293fea6afSBarry Smith } 27393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 27493fea6afSBarry Smith PetscFunctionReturn(0); 27593fea6afSBarry Smith } 27693fea6afSBarry Smith 27793fea6afSBarry Smith #undef __FUNC__ 278*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 27993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 28093fea6afSBarry Smith { 281*6fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 282*6fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 283*6fa18ffdSBarry Smith MatScalar *vsingle; 28493fea6afSBarry Smith 28593fea6afSBarry Smith PetscFunctionBegin; 286*6fa18ffdSBarry Smith if (N > b->setvalueslen) { 287*6fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 288*6fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 289*6fa18ffdSBarry Smith b->setvalueslen = N; 290*6fa18ffdSBarry Smith } 291*6fa18ffdSBarry Smith vsingle = b->setvaluescopy; 29293fea6afSBarry Smith for (i=0; i<N; i++) { 29393fea6afSBarry Smith vsingle[i] = v[i]; 29493fea6afSBarry Smith } 29593fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 29693fea6afSBarry Smith PetscFunctionReturn(0); 29793fea6afSBarry Smith } 298*6fa18ffdSBarry Smith 299*6fa18ffdSBarry Smith #undef __FUNC__ 300*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT" 301*6fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 302*6fa18ffdSBarry Smith { 303*6fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 304*6fa18ffdSBarry Smith int ierr,i,N = m*n; 305*6fa18ffdSBarry Smith MatScalar *vsingle; 306*6fa18ffdSBarry Smith 307*6fa18ffdSBarry Smith PetscFunctionBegin; 308*6fa18ffdSBarry Smith if (N > b->setvalueslen) { 309*6fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 310*6fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 311*6fa18ffdSBarry Smith b->setvalueslen = N; 312*6fa18ffdSBarry Smith } 313*6fa18ffdSBarry Smith vsingle = b->setvaluescopy; 314*6fa18ffdSBarry Smith for (i=0; i<N; i++) { 315*6fa18ffdSBarry Smith vsingle[i] = v[i]; 316*6fa18ffdSBarry Smith } 317*6fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 318*6fa18ffdSBarry Smith PetscFunctionReturn(0); 319*6fa18ffdSBarry Smith } 320*6fa18ffdSBarry Smith 321*6fa18ffdSBarry Smith #undef __FUNC__ 322*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 323*6fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 324*6fa18ffdSBarry Smith { 325*6fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 326*6fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 327*6fa18ffdSBarry Smith MatScalar *vsingle; 328*6fa18ffdSBarry Smith 329*6fa18ffdSBarry Smith PetscFunctionBegin; 330*6fa18ffdSBarry Smith if (N > b->setvalueslen) { 331*6fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 332*6fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 333*6fa18ffdSBarry Smith b->setvalueslen = N; 334*6fa18ffdSBarry Smith } 335*6fa18ffdSBarry Smith vsingle = b->setvaluescopy; 336*6fa18ffdSBarry Smith for (i=0; i<N; i++) { 337*6fa18ffdSBarry Smith vsingle[i] = v[i]; 338*6fa18ffdSBarry Smith } 339*6fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 340*6fa18ffdSBarry Smith PetscFunctionReturn(0); 341*6fa18ffdSBarry Smith } 34293fea6afSBarry Smith #endif 34393fea6afSBarry Smith 34493fea6afSBarry Smith #undef __FUNC__ 345*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 34693fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 34793fea6afSBarry Smith { 34857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 34993fea6afSBarry Smith MatScalar value; 3504fa0d573SSatish Balay int ierr,i,j,row,col; 3514fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 3524fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 3534fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 35457b952d6SSatish Balay 355eada6651SSatish Balay /* Some Variables required in the macro */ 35680c1aa95SSatish Balay Mat A = baij->A; 35780c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 358ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3593eda8832SBarry Smith MatScalar *aa=a->a; 360ac7a638eSSatish Balay 361ac7a638eSSatish Balay Mat B = baij->B; 362ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 363ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3643eda8832SBarry Smith MatScalar *ba=b->a; 365ac7a638eSSatish Balay 366ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 367ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3683eda8832SBarry Smith MatScalar *ap,*bap; 36980c1aa95SSatish Balay 370d64ed03dSBarry Smith PetscFunctionBegin; 37157b952d6SSatish Balay for (i=0; i<m; i++) { 3725ef9f2a5SBarry Smith if (im[i] < 0) continue; 373aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 374a8c6a408SBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 375639f9d9dSBarry Smith #endif 37657b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 37757b952d6SSatish Balay row = im[i] - rstart_orig; 37857b952d6SSatish Balay for (j=0; j<n; j++) { 37957b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 38057b952d6SSatish Balay col = in[j] - cstart_orig; 38157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 382f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 38380c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 38473959e64SBarry Smith } else if (in[j] < 0) continue; 385aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 386a8c6a408SBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 387639f9d9dSBarry Smith #endif 38857b952d6SSatish Balay else { 38957b952d6SSatish Balay if (mat->was_assembled) { 390905e6a2fSBarry Smith if (!baij->colmap) { 391905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 392905e6a2fSBarry Smith } 393aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3940f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 395fa46199cSSatish Balay col = col - 1 + in[j]%bs; 39648e59246SSatish Balay #else 397905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 39848e59246SSatish Balay #endif 39957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 40057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 40157b952d6SSatish Balay col = in[j]; 4029bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4039bf004c3SSatish Balay B = baij->B; 4049bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4059bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4069bf004c3SSatish Balay ba=b->a; 40757b952d6SSatish Balay } 408d64ed03dSBarry Smith } else col = in[j]; 40957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 411ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41257b952d6SSatish Balay } 41357b952d6SSatish Balay } 414d64ed03dSBarry Smith } else { 41590f02eecSBarry Smith if (!baij->donotstash) { 416ff2fd236SBarry Smith if (roworiented) { 417*6fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 418ff2fd236SBarry Smith } else { 419*6fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 42057b952d6SSatish Balay } 42157b952d6SSatish Balay } 42257b952d6SSatish Balay } 42390f02eecSBarry Smith } 4243a40ed3dSBarry Smith PetscFunctionReturn(0); 42557b952d6SSatish Balay } 42657b952d6SSatish Balay 427ab26458aSBarry Smith #undef __FUNC__ 428*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 42993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 430ab26458aSBarry Smith { 431ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 43293fea6afSBarry Smith MatScalar *value,*barray=baij->barray; 4337ef1d9bdSSatish Balay int ierr,i,j,ii,jj,row,col; 434ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 435ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 436ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 437ab26458aSBarry Smith 438b16ae2b1SBarry Smith PetscFunctionBegin; 43930793edcSSatish Balay if(!barray) { 44093fea6afSBarry Smith baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 44130793edcSSatish Balay } 44230793edcSSatish Balay 443ab26458aSBarry Smith if (roworiented) { 444ab26458aSBarry Smith stepval = (n-1)*bs; 445ab26458aSBarry Smith } else { 446ab26458aSBarry Smith stepval = (m-1)*bs; 447ab26458aSBarry Smith } 448ab26458aSBarry Smith for (i=0; i<m; i++) { 4495ef9f2a5SBarry Smith if (im[i] < 0) continue; 450aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4515ef9f2a5SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 452ab26458aSBarry Smith #endif 453ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 454ab26458aSBarry Smith row = im[i] - rstart; 455ab26458aSBarry Smith for (j=0; j<n; j++) { 45615b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 45715b57d14SSatish Balay if ((roworiented) && (n == 1)) { 45815b57d14SSatish Balay barray = v + i*bs2; 45915b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 46015b57d14SSatish Balay barray = v + j*bs2; 46115b57d14SSatish Balay } else { /* Here a copy is required */ 462ab26458aSBarry Smith if (roworiented) { 463ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 464ab26458aSBarry Smith } else { 465ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 466abef11f7SSatish Balay } 46747513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 46847513183SBarry Smith for (jj=0; jj<bs; jj++) { 46930793edcSSatish Balay *barray++ = *value++; 47047513183SBarry Smith } 47147513183SBarry Smith } 47230793edcSSatish Balay barray -=bs2; 47315b57d14SSatish Balay } 474abef11f7SSatish Balay 475abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 476abef11f7SSatish Balay col = in[j] - cstart; 47793fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 478ab26458aSBarry Smith } 4795ef9f2a5SBarry Smith else if (in[j] < 0) continue; 480aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 4815ef9f2a5SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 482ab26458aSBarry Smith #endif 483ab26458aSBarry Smith else { 484ab26458aSBarry Smith if (mat->was_assembled) { 485ab26458aSBarry Smith if (!baij->colmap) { 486ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 487ab26458aSBarry Smith } 488a5eb4965SSatish Balay 489aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 490aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 491fa46199cSSatish Balay { int data; 4920f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4930f5bd95cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 494fa46199cSSatish Balay } 49548e59246SSatish Balay #else 4960f5bd95cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 497a5eb4965SSatish Balay #endif 49848e59246SSatish Balay #endif 499aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5000f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 501fa46199cSSatish Balay col = (col - 1)/bs; 50248e59246SSatish Balay #else 503a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 50448e59246SSatish Balay #endif 505ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 506ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 507ab26458aSBarry Smith col = in[j]; 508ab26458aSBarry Smith } 509ab26458aSBarry Smith } 510ab26458aSBarry Smith else col = in[j]; 51193fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 512ab26458aSBarry Smith } 513ab26458aSBarry Smith } 514d64ed03dSBarry Smith } else { 515ab26458aSBarry Smith if (!baij->donotstash) { 516ff2fd236SBarry Smith if (roworiented) { 517*6fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 518ff2fd236SBarry Smith } else { 519*6fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 520ff2fd236SBarry Smith } 521abef11f7SSatish Balay } 522ab26458aSBarry Smith } 523ab26458aSBarry Smith } 5243a40ed3dSBarry Smith PetscFunctionReturn(0); 525ab26458aSBarry Smith } 526*6fa18ffdSBarry Smith 5270bdbc534SSatish Balay #define HASH_KEY 0.6180339887 528c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 529*6fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 530c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 5315615d1e5SSatish Balay #undef __FUNC__ 532*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar" 533*6fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 5340bdbc534SSatish Balay { 5350bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 5360bdbc534SSatish Balay int ierr,i,j,row,col; 5370bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 538c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 539c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 540329f5518SBarry Smith PetscReal tmp; 5413eda8832SBarry Smith MatScalar **HD = baij->hd,value; 542aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5434a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5444a15367fSSatish Balay #endif 5450bdbc534SSatish Balay 5460bdbc534SSatish Balay PetscFunctionBegin; 5470bdbc534SSatish Balay 5480bdbc534SSatish Balay for (i=0; i<m; i++) { 549aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5500bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 5510bdbc534SSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 5520bdbc534SSatish Balay #endif 5530bdbc534SSatish Balay row = im[i]; 554c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5550bdbc534SSatish Balay for (j=0; j<n; j++) { 5560bdbc534SSatish Balay col = in[j]; 557*6fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5580bdbc534SSatish Balay /* Look up into the Hash Table */ 559c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 560c2760754SSatish Balay h1 = HASH(size,key,tmp); 5610bdbc534SSatish Balay 562c2760754SSatish Balay 563c2760754SSatish Balay idx = h1; 564aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 565187ce0cbSSatish Balay insert_ct++; 566187ce0cbSSatish Balay total_ct++; 567187ce0cbSSatish Balay if (HT[idx] != key) { 568187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 569187ce0cbSSatish Balay if (idx == size) { 570187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 571187ce0cbSSatish Balay if (idx == h1) { 572187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 573187ce0cbSSatish Balay } 574187ce0cbSSatish Balay } 575187ce0cbSSatish Balay } 576187ce0cbSSatish Balay #else 577c2760754SSatish Balay if (HT[idx] != key) { 578c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 579c2760754SSatish Balay if (idx == size) { 580c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 581c2760754SSatish Balay if (idx == h1) { 582c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 583c2760754SSatish Balay } 584c2760754SSatish Balay } 585c2760754SSatish Balay } 586187ce0cbSSatish Balay #endif 587c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 588c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 589c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5900bdbc534SSatish Balay } 5910bdbc534SSatish Balay } else { 5920bdbc534SSatish Balay if (!baij->donotstash) { 593ff2fd236SBarry Smith if (roworiented) { 5948798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 595ff2fd236SBarry Smith } else { 5968798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5970bdbc534SSatish Balay } 5980bdbc534SSatish Balay } 5990bdbc534SSatish Balay } 6000bdbc534SSatish Balay } 601aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 602187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 603187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 604187ce0cbSSatish Balay #endif 6050bdbc534SSatish Balay PetscFunctionReturn(0); 6060bdbc534SSatish Balay } 6070bdbc534SSatish Balay 6080bdbc534SSatish Balay #undef __FUNC__ 609*6fa18ffdSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 610*6fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 6110bdbc534SSatish Balay { 6120bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6138798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 6140bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 615b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 616c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 617329f5518SBarry Smith PetscReal tmp; 6183eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 619*6fa18ffdSBarry Smith MatScalar *v_t,*value; 620aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6214a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6224a15367fSSatish Balay #endif 6230bdbc534SSatish Balay 624d0a41580SSatish Balay PetscFunctionBegin; 625d0a41580SSatish Balay 6260bdbc534SSatish Balay if (roworiented) { 6270bdbc534SSatish Balay stepval = (n-1)*bs; 6280bdbc534SSatish Balay } else { 6290bdbc534SSatish Balay stepval = (m-1)*bs; 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay for (i=0; i<m; i++) { 632aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6330bdbc534SSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 6340bdbc534SSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 6350bdbc534SSatish Balay #endif 6360bdbc534SSatish Balay row = im[i]; 637187ce0cbSSatish Balay v_t = v + i*bs2; 638c2760754SSatish Balay if (row >= rstart && row < rend) { 6390bdbc534SSatish Balay for (j=0; j<n; j++) { 6400bdbc534SSatish Balay col = in[j]; 6410bdbc534SSatish Balay 6420bdbc534SSatish Balay /* Look up into the Hash Table */ 643c2760754SSatish Balay key = row*Nbs+col+1; 644c2760754SSatish Balay h1 = HASH(size,key,tmp); 6450bdbc534SSatish Balay 646c2760754SSatish Balay idx = h1; 647aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 648187ce0cbSSatish Balay total_ct++; 649187ce0cbSSatish Balay insert_ct++; 650187ce0cbSSatish Balay if (HT[idx] != key) { 651187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 652187ce0cbSSatish Balay if (idx == size) { 653187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 654187ce0cbSSatish Balay if (idx == h1) { 655187ce0cbSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 656187ce0cbSSatish Balay } 657187ce0cbSSatish Balay } 658187ce0cbSSatish Balay } 659187ce0cbSSatish Balay #else 660c2760754SSatish Balay if (HT[idx] != key) { 661c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 662c2760754SSatish Balay if (idx == size) { 663c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 664c2760754SSatish Balay if (idx == h1) { 665c2760754SSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 666c2760754SSatish Balay } 667c2760754SSatish Balay } 668c2760754SSatish Balay } 669187ce0cbSSatish Balay #endif 670c2760754SSatish Balay baij_a = HD[idx]; 6710bdbc534SSatish Balay if (roworiented) { 672c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 673187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 674187ce0cbSSatish Balay value = v_t; 675187ce0cbSSatish Balay v_t += bs; 676fef45726SSatish Balay if (addv == ADD_VALUES) { 677c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 678c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 679fef45726SSatish Balay baij_a[jj] += *value++; 680b4cc0f5aSSatish Balay } 681b4cc0f5aSSatish Balay } 682fef45726SSatish Balay } else { 683c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 684c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 685fef45726SSatish Balay baij_a[jj] = *value++; 686fef45726SSatish Balay } 687fef45726SSatish Balay } 688fef45726SSatish Balay } 6890bdbc534SSatish Balay } else { 6900bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 691fef45726SSatish Balay if (addv == ADD_VALUES) { 692b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6930bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 694fef45726SSatish Balay baij_a[jj] += *value++; 695fef45726SSatish Balay } 696fef45726SSatish Balay } 697fef45726SSatish Balay } else { 698fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 699fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 700fef45726SSatish Balay baij_a[jj] = *value++; 701fef45726SSatish Balay } 702b4cc0f5aSSatish Balay } 7030bdbc534SSatish Balay } 7040bdbc534SSatish Balay } 7050bdbc534SSatish Balay } 7060bdbc534SSatish Balay } else { 7070bdbc534SSatish Balay if (!baij->donotstash) { 7080bdbc534SSatish Balay if (roworiented) { 7098798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7100bdbc534SSatish Balay } else { 7118798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7120bdbc534SSatish Balay } 7130bdbc534SSatish Balay } 7140bdbc534SSatish Balay } 7150bdbc534SSatish Balay } 716aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 717187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 718187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 719187ce0cbSSatish Balay #endif 7200bdbc534SSatish Balay PetscFunctionReturn(0); 7210bdbc534SSatish Balay } 722133cdb44SSatish Balay 7230bdbc534SSatish Balay #undef __FUNC__ 724b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 725ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 726d6de1c52SSatish Balay { 727d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 728d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 72948e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 730d6de1c52SSatish Balay 731133cdb44SSatish Balay PetscFunctionBegin; 732d6de1c52SSatish Balay for (i=0; i<m; i++) { 733a8c6a408SBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 734a8c6a408SBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 735d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 736d6de1c52SSatish Balay row = idxm[i] - bsrstart; 737d6de1c52SSatish Balay for (j=0; j<n; j++) { 738a8c6a408SBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 739a8c6a408SBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 740d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 741d6de1c52SSatish Balay col = idxn[j] - bscstart; 74298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 743d64ed03dSBarry Smith } else { 744905e6a2fSBarry Smith if (!baij->colmap) { 745905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 746905e6a2fSBarry Smith } 747aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7480f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 749fa46199cSSatish Balay data --; 75048e59246SSatish Balay #else 75148e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 75248e59246SSatish Balay #endif 75348e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 754d9d09a02SSatish Balay else { 75548e59246SSatish Balay col = data + idxn[j]%bs; 75698dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 757d6de1c52SSatish Balay } 758d6de1c52SSatish Balay } 759d6de1c52SSatish Balay } 760d64ed03dSBarry Smith } else { 761a8c6a408SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 762d6de1c52SSatish Balay } 763d6de1c52SSatish Balay } 7643a40ed3dSBarry Smith PetscFunctionReturn(0); 765d6de1c52SSatish Balay } 766d6de1c52SSatish Balay 7675615d1e5SSatish Balay #undef __FUNC__ 768b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 769329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 770d6de1c52SSatish Balay { 771d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 772d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 773acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 774329f5518SBarry Smith PetscReal sum = 0.0; 7753eda8832SBarry Smith MatScalar *v; 776d6de1c52SSatish Balay 777d64ed03dSBarry Smith PetscFunctionBegin; 778d6de1c52SSatish Balay if (baij->size == 1) { 779d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 780d6de1c52SSatish Balay } else { 781d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 782d6de1c52SSatish Balay v = amat->a; 783d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 784aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 785329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 786d6de1c52SSatish Balay #else 787d6de1c52SSatish Balay sum += (*v)*(*v); v++; 788d6de1c52SSatish Balay #endif 789d6de1c52SSatish Balay } 790d6de1c52SSatish Balay v = bmat->a; 791d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 792aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 793329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 794d6de1c52SSatish Balay #else 795d6de1c52SSatish Balay sum += (*v)*(*v); v++; 796d6de1c52SSatish Balay #endif 797d6de1c52SSatish Balay } 798ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 799d6de1c52SSatish Balay *norm = sqrt(*norm); 800d64ed03dSBarry Smith } else { 801e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 802d6de1c52SSatish Balay } 803d64ed03dSBarry Smith } 8043a40ed3dSBarry Smith PetscFunctionReturn(0); 805d6de1c52SSatish Balay } 80657b952d6SSatish Balay 807bd7f49f5SSatish Balay 808fef45726SSatish Balay /* 809fef45726SSatish Balay Creates the hash table, and sets the table 810fef45726SSatish Balay This table is created only once. 811fef45726SSatish Balay If new entried need to be added to the matrix 812fef45726SSatish Balay then the hash table has to be destroyed and 813fef45726SSatish Balay recreated. 814fef45726SSatish Balay */ 815fef45726SSatish Balay #undef __FUNC__ 816b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 817329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 818596b8d2eSBarry Smith { 819596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 820596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 821596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 8220bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 823549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 824187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 825fef45726SSatish Balay int *HT,key; 8263eda8832SBarry Smith MatScalar **HD; 827329f5518SBarry Smith PetscReal tmp; 828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 8294a15367fSSatish Balay int ct=0,max=0; 8304a15367fSSatish Balay #endif 831fef45726SSatish Balay 832d64ed03dSBarry Smith PetscFunctionBegin; 8330bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8340bdbc534SSatish Balay size = baij->ht_size; 835fef45726SSatish Balay 8360bdbc534SSatish Balay if (baij->ht) { 8370bdbc534SSatish Balay PetscFunctionReturn(0); 838596b8d2eSBarry Smith } 8390bdbc534SSatish Balay 840fef45726SSatish Balay /* Allocate Memory for Hash Table */ 8413eda8832SBarry Smith baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 842b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 843b9e4cc15SSatish Balay HD = baij->hd; 844a07cd24cSSatish Balay HT = baij->ht; 845b9e4cc15SSatish Balay 846b9e4cc15SSatish Balay 847549d3d68SSatish Balay ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 8480bdbc534SSatish Balay 849596b8d2eSBarry Smith 850596b8d2eSBarry Smith /* Loop Over A */ 8510bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 852596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8530bdbc534SSatish Balay row = i+rstart; 8540bdbc534SSatish Balay col = aj[j]+cstart; 855596b8d2eSBarry Smith 856187ce0cbSSatish Balay key = row*Nbs + col + 1; 857c2760754SSatish Balay h1 = HASH(size,key,tmp); 8580bdbc534SSatish Balay for (k=0; k<size; k++){ 8590bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8600bdbc534SSatish Balay HT[(h1+k)%size] = key; 8610bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 862596b8d2eSBarry Smith break; 863aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 864187ce0cbSSatish Balay } else { 865187ce0cbSSatish Balay ct++; 866187ce0cbSSatish Balay #endif 867596b8d2eSBarry Smith } 868187ce0cbSSatish Balay } 869aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 870187ce0cbSSatish Balay if (k> max) max = k; 871187ce0cbSSatish Balay #endif 872596b8d2eSBarry Smith } 873596b8d2eSBarry Smith } 874596b8d2eSBarry Smith /* Loop Over B */ 8750bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 876596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 8770bdbc534SSatish Balay row = i+rstart; 8780bdbc534SSatish Balay col = garray[bj[j]]; 879187ce0cbSSatish Balay key = row*Nbs + col + 1; 880c2760754SSatish Balay h1 = HASH(size,key,tmp); 8810bdbc534SSatish Balay for (k=0; k<size; k++){ 8820bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8830bdbc534SSatish Balay HT[(h1+k)%size] = key; 8840bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 885596b8d2eSBarry Smith break; 886aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 887187ce0cbSSatish Balay } else { 888187ce0cbSSatish Balay ct++; 889187ce0cbSSatish Balay #endif 890596b8d2eSBarry Smith } 891187ce0cbSSatish Balay } 892aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 893187ce0cbSSatish Balay if (k> max) max = k; 894187ce0cbSSatish Balay #endif 895596b8d2eSBarry Smith } 896596b8d2eSBarry Smith } 897596b8d2eSBarry Smith 898596b8d2eSBarry Smith /* Print Summary */ 899aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 900c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 901596b8d2eSBarry Smith if (HT[i]) {j++;} 902c38d4ed2SBarry Smith } 903329f5518SBarry Smith PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 904187ce0cbSSatish Balay #endif 9053a40ed3dSBarry Smith PetscFunctionReturn(0); 906596b8d2eSBarry Smith } 90757b952d6SSatish Balay 908bbb85fb3SSatish Balay #undef __FUNC__ 909b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 910bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 911bbb85fb3SSatish Balay { 912bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 913ff2fd236SBarry Smith int ierr,nstash,reallocs; 914bbb85fb3SSatish Balay InsertMode addv; 915bbb85fb3SSatish Balay 916bbb85fb3SSatish Balay PetscFunctionBegin; 917bbb85fb3SSatish Balay if (baij->donotstash) { 918bbb85fb3SSatish Balay PetscFunctionReturn(0); 919bbb85fb3SSatish Balay } 920bbb85fb3SSatish Balay 921bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 922bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 923bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 924bbb85fb3SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 925bbb85fb3SSatish Balay } 926bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 927bbb85fb3SSatish Balay 9288798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9298798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9308798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 9315a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 9328798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 9335a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 934bbb85fb3SSatish Balay PetscFunctionReturn(0); 935bbb85fb3SSatish Balay } 936bbb85fb3SSatish Balay 937bbb85fb3SSatish Balay #undef __FUNC__ 938b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 939bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 940bbb85fb3SSatish Balay { 941bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 942a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 943a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 9447c922b88SBarry Smith int *row,*col,other_disassembled; 9457c922b88SBarry Smith PetscTruth r1,r2,r3; 9463eda8832SBarry Smith MatScalar *val; 947bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 948bbb85fb3SSatish Balay 949bbb85fb3SSatish Balay PetscFunctionBegin; 950bbb85fb3SSatish Balay if (!baij->donotstash) { 951a2d1c673SSatish Balay while (1) { 9528798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 953a2d1c673SSatish Balay if (!flg) break; 954a2d1c673SSatish Balay 955bbb85fb3SSatish Balay for (i=0; i<n;) { 956bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 957bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 958bbb85fb3SSatish Balay if (j < n) ncols = j-i; 959bbb85fb3SSatish Balay else ncols = n-i; 960bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 96193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 962bbb85fb3SSatish Balay i = j; 963bbb85fb3SSatish Balay } 964bbb85fb3SSatish Balay } 9658798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 966a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 967a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 968a2d1c673SSatish Balay restore the original flags */ 969a2d1c673SSatish Balay r1 = baij->roworiented; 970a2d1c673SSatish Balay r2 = a->roworiented; 971a2d1c673SSatish Balay r3 = b->roworiented; 9727c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 9737c922b88SBarry Smith a->roworiented = PETSC_FALSE; 9747c922b88SBarry Smith b->roworiented = PETSC_FALSE; 975a2d1c673SSatish Balay while (1) { 9768798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 977a2d1c673SSatish Balay if (!flg) break; 978a2d1c673SSatish Balay 979a2d1c673SSatish Balay for (i=0; i<n;) { 980a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 981a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 982a2d1c673SSatish Balay if (j < n) ncols = j-i; 983a2d1c673SSatish Balay else ncols = n-i; 98493fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 985a2d1c673SSatish Balay i = j; 986a2d1c673SSatish Balay } 987a2d1c673SSatish Balay } 9888798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 989a2d1c673SSatish Balay baij->roworiented = r1; 990a2d1c673SSatish Balay a->roworiented = r2; 991a2d1c673SSatish Balay b->roworiented = r3; 992bbb85fb3SSatish Balay } 993bbb85fb3SSatish Balay 994bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 995bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 996bbb85fb3SSatish Balay 997bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 998bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 999bbb85fb3SSatish Balay /* 1000bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1001bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1002bbb85fb3SSatish Balay */ 1003bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1004bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1005bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1006bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1007bbb85fb3SSatish Balay } 1008bbb85fb3SSatish Balay } 1009bbb85fb3SSatish Balay 1010bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1011bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1012bbb85fb3SSatish Balay } 1013bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1014bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1015bbb85fb3SSatish Balay 1016aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1017bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1018c38d4ed2SBarry Smith PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1019bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1020bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1021bbb85fb3SSatish Balay } 1022bbb85fb3SSatish Balay #endif 1023bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1024bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1025bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1026bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1027bbb85fb3SSatish Balay } 1028bbb85fb3SSatish Balay 1029606d414cSSatish Balay if (baij->rowvalues) { 1030606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1031606d414cSSatish Balay baij->rowvalues = 0; 1032606d414cSSatish Balay } 1033bbb85fb3SSatish Balay PetscFunctionReturn(0); 1034bbb85fb3SSatish Balay } 103557b952d6SSatish Balay 10365615d1e5SSatish Balay #undef __FUNC__ 1037b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 10387b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 103957b952d6SSatish Balay { 104057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 104177ed5343SBarry Smith int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 10426831982aSBarry Smith PetscTruth isascii,isdraw; 104357b952d6SSatish Balay 1044d64ed03dSBarry Smith PetscFunctionBegin; 10456831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 10466831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 10470f5bd95cSBarry Smith if (isascii) { 1048d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1049639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 10504e220ebcSLois Curfman McInnes MatInfo info; 1051d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1052d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 10536831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 10544e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10556831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1056d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 10576831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1058d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 10596831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 10606831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 106157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10623a40ed3dSBarry Smith PetscFunctionReturn(0); 1063d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 10646831982aSBarry Smith ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 10653a40ed3dSBarry Smith PetscFunctionReturn(0); 106657b952d6SSatish Balay } 106757b952d6SSatish Balay } 106857b952d6SSatish Balay 10690f5bd95cSBarry Smith if (isdraw) { 107057b952d6SSatish Balay Draw draw; 107157b952d6SSatish Balay PetscTruth isnull; 107277ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 10733a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 107457b952d6SSatish Balay } 107557b952d6SSatish Balay 107657b952d6SSatish Balay if (size == 1) { 107757b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1078d64ed03dSBarry Smith } else { 107957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 108057b952d6SSatish Balay Mat A; 108157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 10823eda8832SBarry Smith int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 10833eda8832SBarry Smith MatScalar *a; 108457b952d6SSatish Balay 108557b952d6SSatish Balay if (!rank) { 108655843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1087d64ed03dSBarry Smith } else { 108855843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 108957b952d6SSatish Balay } 109057b952d6SSatish Balay PLogObjectParent(mat,A); 109157b952d6SSatish Balay 109257b952d6SSatish Balay /* copy over the A part */ 109357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 109457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 109557b952d6SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 109657b952d6SSatish Balay 109757b952d6SSatish Balay for (i=0; i<mbs; i++) { 109857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 109957b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 110057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 110157b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 110257b952d6SSatish Balay for (k=0; k<bs; k++) { 110393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1104cee3aa6bSSatish Balay col++; a += bs; 110557b952d6SSatish Balay } 110657b952d6SSatish Balay } 110757b952d6SSatish Balay } 110857b952d6SSatish Balay /* copy over the B part */ 110957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 111057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 111157b952d6SSatish Balay for (i=0; i<mbs; i++) { 111257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 111357b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 111457b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 111557b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 111657b952d6SSatish Balay for (k=0; k<bs; k++) { 111793fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1118cee3aa6bSSatish Balay col++; a += bs; 111957b952d6SSatish Balay } 112057b952d6SSatish Balay } 112157b952d6SSatish Balay } 1122606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11236d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11246d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112555843e3eSBarry Smith /* 112655843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 112755843e3eSBarry Smith synchronized across all processors that share the Draw object 112855843e3eSBarry Smith */ 11296831982aSBarry Smith if (!rank) { 11306831982aSBarry Smith Viewer sviewer; 11316831982aSBarry Smith ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 11326831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 11336831982aSBarry Smith ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 113457b952d6SSatish Balay } 113557b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 113657b952d6SSatish Balay } 11373a40ed3dSBarry Smith PetscFunctionReturn(0); 113857b952d6SSatish Balay } 113957b952d6SSatish Balay 11405615d1e5SSatish Balay #undef __FUNC__ 1141b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1142e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 114357b952d6SSatish Balay { 114457b952d6SSatish Balay int ierr; 11456831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 114657b952d6SSatish Balay 1147d64ed03dSBarry Smith PetscFunctionBegin; 11486831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 11496831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 11506831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 11516831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 11520f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 11537b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 11545cd90555SBarry Smith } else { 11550f5bd95cSBarry Smith SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 115657b952d6SSatish Balay } 11573a40ed3dSBarry Smith PetscFunctionReturn(0); 115857b952d6SSatish Balay } 115957b952d6SSatish Balay 11605615d1e5SSatish Balay #undef __FUNC__ 1161b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1162e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 116379bdfe76SSatish Balay { 116479bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 116579bdfe76SSatish Balay int ierr; 116679bdfe76SSatish Balay 1167d64ed03dSBarry Smith PetscFunctionBegin; 116898dd23e9SBarry Smith 116998dd23e9SBarry Smith if (mat->mapping) { 117098dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 117198dd23e9SBarry Smith } 117298dd23e9SBarry Smith if (mat->bmapping) { 117398dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 117498dd23e9SBarry Smith } 117561b13de0SBarry Smith if (mat->rmap) { 117661b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 117761b13de0SBarry Smith } 117861b13de0SBarry Smith if (mat->cmap) { 117961b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 118061b13de0SBarry Smith } 1181aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1182e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 118379bdfe76SSatish Balay #endif 118479bdfe76SSatish Balay 11858798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 11868798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 11878798bf22SSatish Balay 1188606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 118979bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 119079bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1191aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 11920f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 119348e59246SSatish Balay #else 1194606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 119548e59246SSatish Balay #endif 1196606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1197606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1198606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1199606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1200606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1201606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1202*6fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 1203*6fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1204*6fa18ffdSBarry Smith #endif 1205606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 120679bdfe76SSatish Balay PLogObjectDestroy(mat); 120779bdfe76SSatish Balay PetscHeaderDestroy(mat); 12083a40ed3dSBarry Smith PetscFunctionReturn(0); 120979bdfe76SSatish Balay } 121079bdfe76SSatish Balay 12115615d1e5SSatish Balay #undef __FUNC__ 1212b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1213ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1214cee3aa6bSSatish Balay { 1215cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 121647b4a8eaSLois Curfman McInnes int ierr,nt; 1217cee3aa6bSSatish Balay 1218d64ed03dSBarry Smith PetscFunctionBegin; 1219e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 122047b4a8eaSLois Curfman McInnes if (nt != a->n) { 1221a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 122247b4a8eaSLois Curfman McInnes } 1223e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 122447b4a8eaSLois Curfman McInnes if (nt != a->m) { 1225a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 122647b4a8eaSLois Curfman McInnes } 122743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1228f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 122943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1230f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 123143a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12323a40ed3dSBarry Smith PetscFunctionReturn(0); 1233cee3aa6bSSatish Balay } 1234cee3aa6bSSatish Balay 12355615d1e5SSatish Balay #undef __FUNC__ 1236b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1237ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1238cee3aa6bSSatish Balay { 1239cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1240cee3aa6bSSatish Balay int ierr; 1241d64ed03dSBarry Smith 1242d64ed03dSBarry Smith PetscFunctionBegin; 124343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1244f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 124543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1246f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12473a40ed3dSBarry Smith PetscFunctionReturn(0); 1248cee3aa6bSSatish Balay } 1249cee3aa6bSSatish Balay 12505615d1e5SSatish Balay #undef __FUNC__ 1251b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 12527c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1253cee3aa6bSSatish Balay { 1254cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255cee3aa6bSSatish Balay int ierr; 1256cee3aa6bSSatish Balay 1257d64ed03dSBarry Smith PetscFunctionBegin; 1258cee3aa6bSSatish Balay /* do nondiagonal part */ 12597c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1260cee3aa6bSSatish Balay /* send it on its way */ 1261537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1262cee3aa6bSSatish Balay /* do local part */ 12637c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1264cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1265cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1266cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1267639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12683a40ed3dSBarry Smith PetscFunctionReturn(0); 1269cee3aa6bSSatish Balay } 1270cee3aa6bSSatish Balay 12715615d1e5SSatish Balay #undef __FUNC__ 1272b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 12737c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1274cee3aa6bSSatish Balay { 1275cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1276cee3aa6bSSatish Balay int ierr; 1277cee3aa6bSSatish Balay 1278d64ed03dSBarry Smith PetscFunctionBegin; 1279cee3aa6bSSatish Balay /* do nondiagonal part */ 12807c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1281cee3aa6bSSatish Balay /* send it on its way */ 1282537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1283cee3aa6bSSatish Balay /* do local part */ 12847c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1285cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1286cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1287cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1288537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12893a40ed3dSBarry Smith PetscFunctionReturn(0); 1290cee3aa6bSSatish Balay } 1291cee3aa6bSSatish Balay 1292cee3aa6bSSatish Balay /* 1293cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1294cee3aa6bSSatish Balay diagonal block 1295cee3aa6bSSatish Balay */ 12965615d1e5SSatish Balay #undef __FUNC__ 1297b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1298ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1299cee3aa6bSSatish Balay { 1300cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 13013a40ed3dSBarry Smith int ierr; 1302d64ed03dSBarry Smith 1303d64ed03dSBarry Smith PetscFunctionBegin; 1304a8c6a408SBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 13053a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13063a40ed3dSBarry Smith PetscFunctionReturn(0); 1307cee3aa6bSSatish Balay } 1308cee3aa6bSSatish Balay 13095615d1e5SSatish Balay #undef __FUNC__ 1310b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1311ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1312cee3aa6bSSatish Balay { 1313cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1314cee3aa6bSSatish Balay int ierr; 1315d64ed03dSBarry Smith 1316d64ed03dSBarry Smith PetscFunctionBegin; 1317cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1318cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13193a40ed3dSBarry Smith PetscFunctionReturn(0); 1320cee3aa6bSSatish Balay } 1321026e39d0SSatish Balay 13225615d1e5SSatish Balay #undef __FUNC__ 1323b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1324ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 132557b952d6SSatish Balay { 132657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1327d64ed03dSBarry Smith 1328d64ed03dSBarry Smith PetscFunctionBegin; 1329bd7f49f5SSatish Balay if (m) *m = mat->M; 1330bd7f49f5SSatish Balay if (n) *n = mat->N; 13313a40ed3dSBarry Smith PetscFunctionReturn(0); 133257b952d6SSatish Balay } 133357b952d6SSatish Balay 13345615d1e5SSatish Balay #undef __FUNC__ 1335b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1336ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 133757b952d6SSatish Balay { 133857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1339d64ed03dSBarry Smith 1340d64ed03dSBarry Smith PetscFunctionBegin; 1341f830108cSBarry Smith *m = mat->m; *n = mat->n; 13423a40ed3dSBarry Smith PetscFunctionReturn(0); 134357b952d6SSatish Balay } 134457b952d6SSatish Balay 13455615d1e5SSatish Balay #undef __FUNC__ 1346b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1347ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 134857b952d6SSatish Balay { 134957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1350d64ed03dSBarry Smith 1351d64ed03dSBarry Smith PetscFunctionBegin; 1352a21fb8cbSBarry Smith if (m) *m = mat->rstart*mat->bs; 1353a21fb8cbSBarry Smith if (n) *n = mat->rend*mat->bs; 13543a40ed3dSBarry Smith PetscFunctionReturn(0); 135557b952d6SSatish Balay } 135657b952d6SSatish Balay 13575615d1e5SSatish Balay #undef __FUNC__ 1358b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1359acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1360acdf5bf4SSatish Balay { 1361acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1362acdf5bf4SSatish Balay Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1363acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1364d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1365d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1366acdf5bf4SSatish Balay 1367d64ed03dSBarry Smith PetscFunctionBegin; 1368a8c6a408SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1369acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1370acdf5bf4SSatish Balay 1371acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1372acdf5bf4SSatish Balay /* 1373acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1374acdf5bf4SSatish Balay */ 1375acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1376bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1377bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1378acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1379acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1380acdf5bf4SSatish Balay } 1381549d3d68SSatish Balay mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1382acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1383acdf5bf4SSatish Balay } 1384acdf5bf4SSatish Balay 1385a8c6a408SBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1386d9d09a02SSatish Balay lrow = row - brstart; 1387acdf5bf4SSatish Balay 1388acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1389acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1390acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1391f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1392f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1393acdf5bf4SSatish Balay nztot = nzA + nzB; 1394acdf5bf4SSatish Balay 1395acdf5bf4SSatish Balay cmap = mat->garray; 1396acdf5bf4SSatish Balay if (v || idx) { 1397acdf5bf4SSatish Balay if (nztot) { 1398acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1399acdf5bf4SSatish Balay int imark = -1; 1400acdf5bf4SSatish Balay if (v) { 1401acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1402acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1403d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1404acdf5bf4SSatish Balay else break; 1405acdf5bf4SSatish Balay } 1406acdf5bf4SSatish Balay imark = i; 1407acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1408acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1409acdf5bf4SSatish Balay } 1410acdf5bf4SSatish Balay if (idx) { 1411acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1412acdf5bf4SSatish Balay if (imark > -1) { 1413acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1414bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1415acdf5bf4SSatish Balay } 1416acdf5bf4SSatish Balay } else { 1417acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1418d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1419d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1420acdf5bf4SSatish Balay else break; 1421acdf5bf4SSatish Balay } 1422acdf5bf4SSatish Balay imark = i; 1423acdf5bf4SSatish Balay } 1424d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1425d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1426acdf5bf4SSatish Balay } 1427d64ed03dSBarry Smith } else { 1428d212a18eSSatish Balay if (idx) *idx = 0; 1429d212a18eSSatish Balay if (v) *v = 0; 1430d212a18eSSatish Balay } 1431acdf5bf4SSatish Balay } 1432acdf5bf4SSatish Balay *nz = nztot; 1433f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1434f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14353a40ed3dSBarry Smith PetscFunctionReturn(0); 1436acdf5bf4SSatish Balay } 1437acdf5bf4SSatish Balay 14385615d1e5SSatish Balay #undef __FUNC__ 1439b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1440acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1441acdf5bf4SSatish Balay { 1442acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1443d64ed03dSBarry Smith 1444d64ed03dSBarry Smith PetscFunctionBegin; 1445acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1446a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1447acdf5bf4SSatish Balay } 1448acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14493a40ed3dSBarry Smith PetscFunctionReturn(0); 1450acdf5bf4SSatish Balay } 1451acdf5bf4SSatish Balay 14525615d1e5SSatish Balay #undef __FUNC__ 1453b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1454ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14555a838052SSatish Balay { 14565a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1457d64ed03dSBarry Smith 1458d64ed03dSBarry Smith PetscFunctionBegin; 14595a838052SSatish Balay *bs = baij->bs; 14603a40ed3dSBarry Smith PetscFunctionReturn(0); 14615a838052SSatish Balay } 14625a838052SSatish Balay 14635615d1e5SSatish Balay #undef __FUNC__ 1464b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1465ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 146658667388SSatish Balay { 146758667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 146858667388SSatish Balay int ierr; 1469d64ed03dSBarry Smith 1470d64ed03dSBarry Smith PetscFunctionBegin; 147158667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 147258667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14733a40ed3dSBarry Smith PetscFunctionReturn(0); 147458667388SSatish Balay } 14750ac07820SSatish Balay 14765615d1e5SSatish Balay #undef __FUNC__ 1477b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1478ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14790ac07820SSatish Balay { 14804e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14814e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14827d57db60SLois Curfman McInnes int ierr; 1483329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14840ac07820SSatish Balay 1485d64ed03dSBarry Smith PetscFunctionBegin; 14864e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14874e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14880e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1489de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14904e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14910e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1492de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14930ac07820SSatish Balay if (flag == MAT_LOCAL) { 14944e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14954e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14964e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14974e220ebcSLois Curfman McInnes info->memory = isend[3]; 14984e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14990ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1500f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 15014e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15024e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15034e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15044e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15054e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15060ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1507f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 15084e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15094e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15104e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15114e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15124e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1513d41123aaSBarry Smith } else { 1514d41123aaSBarry Smith SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 15150ac07820SSatish Balay } 15165a5d4f66SBarry Smith info->rows_global = (double)a->M; 15175a5d4f66SBarry Smith info->columns_global = (double)a->N; 15185a5d4f66SBarry Smith info->rows_local = (double)a->m; 15195a5d4f66SBarry Smith info->columns_local = (double)a->N; 15204e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15214e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15224e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15233a40ed3dSBarry Smith PetscFunctionReturn(0); 15240ac07820SSatish Balay } 15250ac07820SSatish Balay 15265615d1e5SSatish Balay #undef __FUNC__ 1527b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1528ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 152958667388SSatish Balay { 153058667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 153198305bb5SBarry Smith int ierr; 153258667388SSatish Balay 1533d64ed03dSBarry Smith PetscFunctionBegin; 153458667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 153558667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 15366da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1537c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 15384787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 15397c922b88SBarry Smith op == MAT_KEEP_ZEROED_ROWS || 15404787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 154198305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154298305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1543b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 15447c922b88SBarry Smith a->roworiented = PETSC_TRUE; 154598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1547b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 15486da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 154958667388SSatish Balay op == MAT_SYMMETRIC || 155058667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1551b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 155298305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 155358667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 155498305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 15557c922b88SBarry Smith a->roworiented = PETSC_FALSE; 155698305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155798305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 15582b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 15597c922b88SBarry Smith a->donotstash = PETSC_TRUE; 1560d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1561d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1562133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 15637c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 1564d64ed03dSBarry Smith } else { 1565d64ed03dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1566d64ed03dSBarry Smith } 15673a40ed3dSBarry Smith PetscFunctionReturn(0); 156858667388SSatish Balay } 156958667388SSatish Balay 15705615d1e5SSatish Balay #undef __FUNC__ 1571b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1572ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15730ac07820SSatish Balay { 15740ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15750ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15760ac07820SSatish Balay Mat B; 157740011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 15780ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15793eda8832SBarry Smith MatScalar *a; 15800ac07820SSatish Balay 1581d64ed03dSBarry Smith PetscFunctionBegin; 15827c922b88SBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 15837c922b88SBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 15840ac07820SSatish Balay 15850ac07820SSatish Balay /* copy over the A part */ 15860ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15870ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15880ac07820SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 15890ac07820SSatish Balay 15900ac07820SSatish Balay for (i=0; i<mbs; i++) { 15910ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15920ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15930ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15940ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15950ac07820SSatish Balay for (k=0; k<bs; k++) { 159693fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15970ac07820SSatish Balay col++; a += bs; 15980ac07820SSatish Balay } 15990ac07820SSatish Balay } 16000ac07820SSatish Balay } 16010ac07820SSatish Balay /* copy over the B part */ 16020ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16030ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16040ac07820SSatish Balay for (i=0; i<mbs; i++) { 16050ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16060ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16070ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16080ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16090ac07820SSatish Balay for (k=0; k<bs; k++) { 161093fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16110ac07820SSatish Balay col++; a += bs; 16120ac07820SSatish Balay } 16130ac07820SSatish Balay } 16140ac07820SSatish Balay } 1615606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16160ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16170ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16180ac07820SSatish Balay 16197c922b88SBarry Smith if (matout) { 16200ac07820SSatish Balay *matout = B; 16210ac07820SSatish Balay } else { 1622f830108cSBarry Smith PetscOps *Abops; 1623cc2dc46cSBarry Smith MatOps Aops; 1624f830108cSBarry Smith 16250ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 1626606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 16270ac07820SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 16280ac07820SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1629aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 16300f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1631b1fc9764SSatish Balay #else 1632606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1633b1fc9764SSatish Balay #endif 1634606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1635606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1636606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1637606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1638f830108cSBarry Smith 1639f830108cSBarry Smith /* 1640f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1641f830108cSBarry Smith A pointers for the bops and ops but copy everything 1642f830108cSBarry Smith else from C. 1643f830108cSBarry Smith */ 1644f830108cSBarry Smith Abops = A->bops; 1645f830108cSBarry Smith Aops = A->ops; 1646549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1647f830108cSBarry Smith A->bops = Abops; 1648f830108cSBarry Smith A->ops = Aops; 1649f830108cSBarry Smith 16500ac07820SSatish Balay PetscHeaderDestroy(B); 16510ac07820SSatish Balay } 16523a40ed3dSBarry Smith PetscFunctionReturn(0); 16530ac07820SSatish Balay } 16540e95ebc0SSatish Balay 16555615d1e5SSatish Balay #undef __FUNC__ 1656b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 165736c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16580e95ebc0SSatish Balay { 165936c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 166036c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 16610e95ebc0SSatish Balay int ierr,s1,s2,s3; 16620e95ebc0SSatish Balay 1663d64ed03dSBarry Smith PetscFunctionBegin; 166436c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 166536c4a09eSSatish Balay if (rr) { 166636c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 166736c4a09eSSatish Balay if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 166836c4a09eSSatish Balay /* Overlap communication with computation. */ 166936c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 167036c4a09eSSatish Balay } 16710e95ebc0SSatish Balay if (ll) { 16720e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 167336c4a09eSSatish Balay if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1674a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16750e95ebc0SSatish Balay } 167636c4a09eSSatish Balay /* scale the diagonal block */ 167736c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 167836c4a09eSSatish Balay 167936c4a09eSSatish Balay if (rr) { 168036c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 168136c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1682a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 168336c4a09eSSatish Balay } 168436c4a09eSSatish Balay 16853a40ed3dSBarry Smith PetscFunctionReturn(0); 16860e95ebc0SSatish Balay } 16870e95ebc0SSatish Balay 16885615d1e5SSatish Balay #undef __FUNC__ 1689b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1690ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 16910ac07820SSatish Balay { 16920ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16930ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1694a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 16950ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16960ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1697a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16980ac07820SSatish Balay MPI_Comm comm = A->comm; 16990ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17000ac07820SSatish Balay MPI_Status recv_status,*send_status; 17010ac07820SSatish Balay IS istmp; 17020ac07820SSatish Balay 1703d64ed03dSBarry Smith PetscFunctionBegin; 17040ac07820SSatish Balay ierr = ISGetSize(is,&N);CHKERRQ(ierr); 17050ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 17060ac07820SSatish Balay 17070ac07820SSatish Balay /* first count number of contributors to each processor */ 17080ac07820SSatish Balay nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1709549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1710549d3d68SSatish Balay procs = nprocs + size; 17110ac07820SSatish Balay owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 17120ac07820SSatish Balay for (i=0; i<N; i++) { 17130ac07820SSatish Balay idx = rows[i]; 17140ac07820SSatish Balay found = 0; 17150ac07820SSatish Balay for (j=0; j<size; j++) { 17160ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 17170ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 17180ac07820SSatish Balay } 17190ac07820SSatish Balay } 1720a8c6a408SBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 17210ac07820SSatish Balay } 17220ac07820SSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 17230ac07820SSatish Balay 17240ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 17256831982aSBarry Smith work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 17266831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 17270ac07820SSatish Balay nmax = work[rank]; 17286831982aSBarry Smith nrecvs = work[size+rank]; 1729606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 17300ac07820SSatish Balay 17310ac07820SSatish Balay /* post receives: */ 1732d64ed03dSBarry Smith rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1733d64ed03dSBarry Smith recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 17340ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1735ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17360ac07820SSatish Balay } 17370ac07820SSatish Balay 17380ac07820SSatish Balay /* do sends: 17390ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17400ac07820SSatish Balay the ith processor 17410ac07820SSatish Balay */ 17420ac07820SSatish Balay svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1743ca161407SBarry Smith send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 17440ac07820SSatish Balay starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 17450ac07820SSatish Balay starts[0] = 0; 17460ac07820SSatish Balay for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17470ac07820SSatish Balay for (i=0; i<N; i++) { 17480ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17490ac07820SSatish Balay } 17506831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17510ac07820SSatish Balay 17520ac07820SSatish Balay starts[0] = 0; 17530ac07820SSatish Balay for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17540ac07820SSatish Balay count = 0; 17550ac07820SSatish Balay for (i=0; i<size; i++) { 17560ac07820SSatish Balay if (procs[i]) { 1757ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17580ac07820SSatish Balay } 17590ac07820SSatish Balay } 1760606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17610ac07820SSatish Balay 17620ac07820SSatish Balay base = owners[rank]*bs; 17630ac07820SSatish Balay 17640ac07820SSatish Balay /* wait on receives */ 17650ac07820SSatish Balay lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 17660ac07820SSatish Balay source = lens + nrecvs; 17670ac07820SSatish Balay count = nrecvs; slen = 0; 17680ac07820SSatish Balay while (count) { 1769ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17700ac07820SSatish Balay /* unpack receives into our local space */ 1771ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17720ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17730ac07820SSatish Balay lens[imdex] = n; 17740ac07820SSatish Balay slen += n; 17750ac07820SSatish Balay count--; 17760ac07820SSatish Balay } 1777606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17780ac07820SSatish Balay 17790ac07820SSatish Balay /* move the data into the send scatter */ 17800ac07820SSatish Balay lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 17810ac07820SSatish Balay count = 0; 17820ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17830ac07820SSatish Balay values = rvalues + i*nmax; 17840ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17850ac07820SSatish Balay lrows[count++] = values[j] - base; 17860ac07820SSatish Balay } 17870ac07820SSatish Balay } 1788606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1789606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1790606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1791606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17920ac07820SSatish Balay 17930ac07820SSatish Balay /* actually zap the local rows */ 1794029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 17950ac07820SSatish Balay PLogObjectParent(A,istmp); 1796a07cd24cSSatish Balay 179772dacd9aSBarry Smith /* 179872dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 179972dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 180072dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180172dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180272dacd9aSBarry Smith 180372dacd9aSBarry Smith Contributed by: Mathew Knepley 180472dacd9aSBarry Smith */ 18059c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1806*6fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 18079c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 1808*6fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 18099c957beeSSatish Balay } else if (diag) { 1810*6fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1811fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1812fa46199cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18146525c446SSatish Balay } 1815a07cd24cSSatish Balay for (i = 0; i < slen; i++) { 1816a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 18173eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1818a07cd24cSSatish Balay } 1819a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18219c957beeSSatish Balay } else { 1822*6fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1823a07cd24cSSatish Balay } 18249c957beeSSatish Balay 18259c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1826606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1827a07cd24cSSatish Balay 18280ac07820SSatish Balay /* wait on sends */ 18290ac07820SSatish Balay if (nsends) { 1830d64ed03dSBarry Smith send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1831ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1832606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18330ac07820SSatish Balay } 1834606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1835606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18360ac07820SSatish Balay 18373a40ed3dSBarry Smith PetscFunctionReturn(0); 18380ac07820SSatish Balay } 183972dacd9aSBarry Smith 18405615d1e5SSatish Balay #undef __FUNC__ 1841b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1842ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1843ba4ca20aSSatish Balay { 1844ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 184525fdafccSSatish Balay MPI_Comm comm = A->comm; 1846133cdb44SSatish Balay static int called = 0; 18473a40ed3dSBarry Smith int ierr; 1848ba4ca20aSSatish Balay 1849d64ed03dSBarry Smith PetscFunctionBegin; 18503a40ed3dSBarry Smith if (!a->rank) { 18513a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 185225fdafccSSatish Balay } 185325fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1854d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1855d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18563a40ed3dSBarry Smith PetscFunctionReturn(0); 1857ba4ca20aSSatish Balay } 18580ac07820SSatish Balay 18595615d1e5SSatish Balay #undef __FUNC__ 1860b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1861ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1862bb5a7306SBarry Smith { 1863bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1864bb5a7306SBarry Smith int ierr; 1865d64ed03dSBarry Smith 1866d64ed03dSBarry Smith PetscFunctionBegin; 1867bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18683a40ed3dSBarry Smith PetscFunctionReturn(0); 1869bb5a7306SBarry Smith } 1870bb5a7306SBarry Smith 18712e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18720ac07820SSatish Balay 18737fc3c18eSBarry Smith #undef __FUNC__ 1874b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 18757fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18767fc3c18eSBarry Smith { 18777fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18787fc3c18eSBarry Smith Mat a,b,c,d; 18797fc3c18eSBarry Smith PetscTruth flg; 18807fc3c18eSBarry Smith int ierr; 18817fc3c18eSBarry Smith 18827fc3c18eSBarry Smith PetscFunctionBegin; 18837fc3c18eSBarry Smith if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 18847fc3c18eSBarry Smith a = matA->A; b = matA->B; 18857fc3c18eSBarry Smith c = matB->A; d = matB->B; 18867fc3c18eSBarry Smith 18877fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18887fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18897fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18907fc3c18eSBarry Smith } 18917fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18927fc3c18eSBarry Smith PetscFunctionReturn(0); 18937fc3c18eSBarry Smith } 18947fc3c18eSBarry Smith 189579bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1896cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1897cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1898cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1899cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1900cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1901cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 19027c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19037c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1904cc2dc46cSBarry Smith 0, 1905cc2dc46cSBarry Smith 0, 1906cc2dc46cSBarry Smith 0, 1907cc2dc46cSBarry Smith 0, 1908cc2dc46cSBarry Smith 0, 1909cc2dc46cSBarry Smith 0, 1910cc2dc46cSBarry Smith 0, 1911cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1912cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 19137fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1914cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1915cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1916cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1917cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1918cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1919cc2dc46cSBarry Smith 0, 1920cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1921cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1922cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1923cc2dc46cSBarry Smith 0, 1924cc2dc46cSBarry Smith 0, 1925cc2dc46cSBarry Smith 0, 1926cc2dc46cSBarry Smith 0, 1927cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1928cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1929cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1930cc2dc46cSBarry Smith 0, 1931cc2dc46cSBarry Smith 0, 1932cc2dc46cSBarry Smith 0, 1933cc2dc46cSBarry Smith 0, 19342e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1935cc2dc46cSBarry Smith 0, 1936cc2dc46cSBarry Smith 0, 1937cc2dc46cSBarry Smith 0, 1938cc2dc46cSBarry Smith 0, 1939cc2dc46cSBarry Smith 0, 1940cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1941cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1942cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1943cc2dc46cSBarry Smith 0, 1944cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1945cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1946cc2dc46cSBarry Smith 0, 1947cc2dc46cSBarry Smith 0, 1948cc2dc46cSBarry Smith 0, 1949cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1950cc2dc46cSBarry Smith 0, 1951cc2dc46cSBarry Smith 0, 1952cc2dc46cSBarry Smith 0, 1953cc2dc46cSBarry Smith 0, 1954cc2dc46cSBarry Smith 0, 1955cc2dc46cSBarry Smith 0, 1956cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1957cc2dc46cSBarry Smith 0, 1958cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1959cc2dc46cSBarry Smith 0, 1960cc2dc46cSBarry Smith 0, 1961cc2dc46cSBarry Smith 0, 1962cc2dc46cSBarry Smith MatGetMaps_Petsc}; 196379bdfe76SSatish Balay 19645ef9f2a5SBarry Smith 1965e18c124aSSatish Balay EXTERN_C_BEGIN 19665ef9f2a5SBarry Smith #undef __FUNC__ 1967b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 19685ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19695ef9f2a5SBarry Smith { 19705ef9f2a5SBarry Smith PetscFunctionBegin; 19715ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19725ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19735ef9f2a5SBarry Smith PetscFunctionReturn(0); 19745ef9f2a5SBarry Smith } 1975e18c124aSSatish Balay EXTERN_C_END 197679bdfe76SSatish Balay 19775615d1e5SSatish Balay #undef __FUNC__ 1978b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 197979bdfe76SSatish Balay /*@C 198079bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 198179bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 198279bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 198379bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 198479bdfe76SSatish Balay performance can be increased by more than a factor of 50. 198579bdfe76SSatish Balay 1986db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1987db81eaa0SLois Curfman McInnes 198879bdfe76SSatish Balay Input Parameters: 1989db81eaa0SLois Curfman McInnes + comm - MPI communicator 199079bdfe76SSatish Balay . bs - size of blockk 199179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 199292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 199392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 199492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 199592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 199692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1997be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1998be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 199979bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 200079bdfe76SSatish Balay submatrix (same for all local rows) 20017fc3c18eSBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 200292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2003db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 200492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 200579bdfe76SSatish Balay submatrix (same for all local rows). 20067fc3c18eSBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 200792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 200892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 200979bdfe76SSatish Balay 201079bdfe76SSatish Balay Output Parameter: 201179bdfe76SSatish Balay . A - the matrix 201279bdfe76SSatish Balay 2013db81eaa0SLois Curfman McInnes Options Database Keys: 2014db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2015db81eaa0SLois Curfman McInnes block calculations (much slower) 2016db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 2017494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 2018494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 20193ffaccefSLois Curfman McInnes 2020b259b22eSLois Curfman McInnes Notes: 202179bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 202279bdfe76SSatish Balay (possibly both). 202379bdfe76SSatish Balay 2024be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2025be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2026be79a94dSBarry Smith 202779bdfe76SSatish Balay Storage Information: 202879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 202979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 203079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 203179bdfe76SSatish Balay local matrix (a rectangular submatrix). 203279bdfe76SSatish Balay 203379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 203479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 203579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 203679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 203779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 203879bdfe76SSatish Balay 203979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 204079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 204179bdfe76SSatish Balay 2042db81eaa0SLois Curfman McInnes .vb 2043db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2044db81eaa0SLois Curfman McInnes ------------------- 2045db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2046db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2047db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2048db81eaa0SLois Curfman McInnes ------------------- 2049db81eaa0SLois Curfman McInnes .ve 205079bdfe76SSatish Balay 205179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 205279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 205379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 205457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 205579bdfe76SSatish Balay 2056d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2057d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 205879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 205992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 206092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 20616da5968aSLois Curfman McInnes matrices. 206279bdfe76SSatish Balay 2063027ccd11SLois Curfman McInnes Level: intermediate 2064027ccd11SLois Curfman McInnes 206592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 206679bdfe76SSatish Balay 20673eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 206879bdfe76SSatish Balay @*/ 2069329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 207079bdfe76SSatish Balay { 207179bdfe76SSatish Balay Mat B; 207279bdfe76SSatish Balay Mat_MPIBAIJ *b; 2073f1af5d2fSBarry Smith int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2074f1af5d2fSBarry Smith PetscTruth flag1,flag2,flg; 207579bdfe76SSatish Balay 2076d64ed03dSBarry Smith PetscFunctionBegin; 2077962fb4a0SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2078962fb4a0SBarry Smith 2079a8c6a408SBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 208036c4a09eSSatish Balay if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 208136c4a09eSSatish Balay if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 20824fdb0a08SBarry Smith if (d_nnz) { 208336c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 20844fdb0a08SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 20854fdb0a08SBarry Smith } 20864fdb0a08SBarry Smith } 20874fdb0a08SBarry Smith if (o_nnz) { 208836c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 20894fdb0a08SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 20904fdb0a08SBarry Smith } 20914fdb0a08SBarry Smith } 20923914022bSBarry Smith 2093d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2094494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2095494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2096494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 20973914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 20983914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 20993914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 21003a40ed3dSBarry Smith PetscFunctionReturn(0); 21013914022bSBarry Smith } 21023914022bSBarry Smith 210379bdfe76SSatish Balay *A = 0; 21043f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 210579bdfe76SSatish Balay PLogObjectCreate(B); 210679bdfe76SSatish Balay B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2107549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2108549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 21094c50302cSBarry Smith 2110e1311b90SBarry Smith B->ops->destroy = MatDestroy_MPIBAIJ; 2111e1311b90SBarry Smith B->ops->view = MatView_MPIBAIJ; 211290f02eecSBarry Smith B->mapping = 0; 211379bdfe76SSatish Balay B->factor = 0; 211479bdfe76SSatish Balay B->assembled = PETSC_FALSE; 211579bdfe76SSatish Balay 2116e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 2117d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2118d132466eSBarry Smith ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 211979bdfe76SSatish Balay 21207c922b88SBarry Smith if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2121a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2122d64ed03dSBarry Smith } 2123a8c6a408SBarry Smith if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2124a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2125a8c6a408SBarry Smith } 2126a8c6a408SBarry Smith if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2127a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2128a8c6a408SBarry Smith } 2129cee3aa6bSSatish Balay if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2130cee3aa6bSSatish Balay if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 213179bdfe76SSatish Balay 213279bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 213379bdfe76SSatish Balay work[0] = m; work[1] = n; 213479bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2135ca161407SBarry Smith ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 213679bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 213779bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 213879bdfe76SSatish Balay } 213979bdfe76SSatish Balay if (m == PETSC_DECIDE) { 214079bdfe76SSatish Balay Mbs = M/bs; 2141a8c6a408SBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 214279bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 214379bdfe76SSatish Balay m = mbs*bs; 214479bdfe76SSatish Balay } 214579bdfe76SSatish Balay if (n == PETSC_DECIDE) { 214679bdfe76SSatish Balay Nbs = N/bs; 2147a8c6a408SBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 214879bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 214979bdfe76SSatish Balay n = nbs*bs; 215079bdfe76SSatish Balay } 2151a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2152a8c6a408SBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2153a8c6a408SBarry Smith } 215479bdfe76SSatish Balay 215579bdfe76SSatish Balay b->m = m; B->m = m; 215679bdfe76SSatish Balay b->n = n; B->n = n; 215779bdfe76SSatish Balay b->N = N; B->N = N; 215879bdfe76SSatish Balay b->M = M; B->M = M; 215979bdfe76SSatish Balay b->bs = bs; 216079bdfe76SSatish Balay b->bs2 = bs*bs; 216179bdfe76SSatish Balay b->mbs = mbs; 216279bdfe76SSatish Balay b->nbs = nbs; 216379bdfe76SSatish Balay b->Mbs = Mbs; 216479bdfe76SSatish Balay b->Nbs = Nbs; 216579bdfe76SSatish Balay 2166c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2167c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 2168488ecbafSBarry Smith ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2169488ecbafSBarry Smith ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2170c7fcc2eaSBarry Smith 217179bdfe76SSatish Balay /* build local table of row and column ownerships */ 2172ff2fd236SBarry Smith b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2173ff2fd236SBarry Smith PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 21740ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2175ff2fd236SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2176ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 217779bdfe76SSatish Balay b->rowners[0] = 0; 217879bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 217979bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 218079bdfe76SSatish Balay } 2181ff2fd236SBarry Smith for (i=0; i<=b->size; i++) { 2182ff2fd236SBarry Smith b->rowners_bs[i] = b->rowners[i]*bs; 2183ff2fd236SBarry Smith } 218479bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 218579bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 21864fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 21874fa0d573SSatish Balay b->rend_bs = b->rend * bs; 21884fa0d573SSatish Balay 2189ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 219079bdfe76SSatish Balay b->cowners[0] = 0; 219179bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 219279bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 219379bdfe76SSatish Balay } 219479bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 219579bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 21964fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 21974fa0d573SSatish Balay b->cend_bs = b->cend * bs; 219879bdfe76SSatish Balay 2199a07cd24cSSatish Balay 220079bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2201029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 220279bdfe76SSatish Balay PLogObjectParent(B,b->A); 220379bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2204029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 220579bdfe76SSatish Balay PLogObjectParent(B,b->B); 220679bdfe76SSatish Balay 220779bdfe76SSatish Balay /* build cache for off array entries formed */ 22088798bf22SSatish Balay ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 22098798bf22SSatish Balay ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 22107c922b88SBarry Smith b->donotstash = PETSC_FALSE; 22117c922b88SBarry Smith b->colmap = PETSC_NULL; 22127c922b88SBarry Smith b->garray = PETSC_NULL; 22137c922b88SBarry Smith b->roworiented = PETSC_TRUE; 221479bdfe76SSatish Balay 2215*6fa18ffdSBarry Smith #if defined(PEYSC_USE_MAT_SINGLE) 2216*6fa18ffdSBarry Smith /* stuff for MatSetValues_XXX in single precision */ 2217*6fa18ffdSBarry Smith b->lensetvalues = 0; 2218*6fa18ffdSBarry Smith b->setvaluescopy = PETSC_NULL; 2219*6fa18ffdSBarry Smith #endif 2220*6fa18ffdSBarry Smith 222130793edcSSatish Balay /* stuff used in block assembly */ 222230793edcSSatish Balay b->barray = 0; 222330793edcSSatish Balay 222479bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 222579bdfe76SSatish Balay b->lvec = 0; 222679bdfe76SSatish Balay b->Mvctx = 0; 222779bdfe76SSatish Balay 222879bdfe76SSatish Balay /* stuff for MatGetRow() */ 222979bdfe76SSatish Balay b->rowindices = 0; 223079bdfe76SSatish Balay b->rowvalues = 0; 223179bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 223279bdfe76SSatish Balay 2233a07cd24cSSatish Balay /* hash table stuff */ 2234a07cd24cSSatish Balay b->ht = 0; 2235187ce0cbSSatish Balay b->hd = 0; 22360bdbc534SSatish Balay b->ht_size = 0; 22377c922b88SBarry Smith b->ht_flag = PETSC_FALSE; 223825fdafccSSatish Balay b->ht_fact = 0; 2239187ce0cbSSatish Balay b->ht_total_ct = 0; 2240187ce0cbSSatish Balay b->ht_insert_ct = 0; 2241a07cd24cSSatish Balay 224279bdfe76SSatish Balay *A = B; 2243133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2244133cdb44SSatish Balay if (flg) { 2245133cdb44SSatish Balay double fact = 1.39; 2246133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2247f1af5d2fSBarry Smith ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2248133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2249133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2250133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2251133cdb44SSatish Balay } 2252f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 22537fc3c18eSBarry Smith "MatStoreValues_MPIBAIJ", 22547fc3c18eSBarry Smith (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2255f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 22567fc3c18eSBarry Smith "MatRetrieveValues_MPIBAIJ", 22577fc3c18eSBarry Smith (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2258f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 22595ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 22605ef9f2a5SBarry Smith (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 22613a40ed3dSBarry Smith PetscFunctionReturn(0); 226279bdfe76SSatish Balay } 2263026e39d0SSatish Balay 22645615d1e5SSatish Balay #undef __FUNC__ 2265b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 22662e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 22670ac07820SSatish Balay { 22680ac07820SSatish Balay Mat mat; 22690ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2270f1af5d2fSBarry Smith int ierr,len=0; 2271f1af5d2fSBarry Smith PetscTruth flg; 22720ac07820SSatish Balay 2273d64ed03dSBarry Smith PetscFunctionBegin; 22740ac07820SSatish Balay *newmat = 0; 22753f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 22760ac07820SSatish Balay PLogObjectCreate(mat); 22770ac07820SSatish Balay mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2278549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2279e1311b90SBarry Smith mat->ops->destroy = MatDestroy_MPIBAIJ; 2280e1311b90SBarry Smith mat->ops->view = MatView_MPIBAIJ; 22810ac07820SSatish Balay mat->factor = matin->factor; 22820ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2283aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 22840ac07820SSatish Balay 22850ac07820SSatish Balay a->m = mat->m = oldmat->m; 22860ac07820SSatish Balay a->n = mat->n = oldmat->n; 22870ac07820SSatish Balay a->M = mat->M = oldmat->M; 22880ac07820SSatish Balay a->N = mat->N = oldmat->N; 22890ac07820SSatish Balay 22900ac07820SSatish Balay a->bs = oldmat->bs; 22910ac07820SSatish Balay a->bs2 = oldmat->bs2; 22920ac07820SSatish Balay a->mbs = oldmat->mbs; 22930ac07820SSatish Balay a->nbs = oldmat->nbs; 22940ac07820SSatish Balay a->Mbs = oldmat->Mbs; 22950ac07820SSatish Balay a->Nbs = oldmat->Nbs; 22960ac07820SSatish Balay 22970ac07820SSatish Balay a->rstart = oldmat->rstart; 22980ac07820SSatish Balay a->rend = oldmat->rend; 22990ac07820SSatish Balay a->cstart = oldmat->cstart; 23000ac07820SSatish Balay a->cend = oldmat->cend; 23010ac07820SSatish Balay a->size = oldmat->size; 23020ac07820SSatish Balay a->rank = oldmat->rank; 2303aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2304aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2305aef5e8e0SSatish Balay a->rowindices = 0; 23060ac07820SSatish Balay a->rowvalues = 0; 23070ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 230830793edcSSatish Balay a->barray = 0; 23093123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 23103123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 23113123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 23123123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 23130ac07820SSatish Balay 2314133cdb44SSatish Balay /* hash table stuff */ 2315133cdb44SSatish Balay a->ht = 0; 2316133cdb44SSatish Balay a->hd = 0; 2317133cdb44SSatish Balay a->ht_size = 0; 2318133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 231925fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2320133cdb44SSatish Balay a->ht_total_ct = 0; 2321133cdb44SSatish Balay a->ht_insert_ct = 0; 2322133cdb44SSatish Balay 2323133cdb44SSatish Balay 2324ff2fd236SBarry Smith a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2325ff2fd236SBarry Smith PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 23260ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 2327ff2fd236SBarry Smith a->rowners_bs = a->cowners + a->size + 2; 2328549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 23298798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 23308798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 23310ac07820SSatish Balay if (oldmat->colmap) { 2332aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 23330f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 233448e59246SSatish Balay #else 23350ac07820SSatish Balay a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 23360ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2337549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 233848e59246SSatish Balay #endif 23390ac07820SSatish Balay } else a->colmap = 0; 23400ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 23410ac07820SSatish Balay a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 23420ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 2343549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 23440ac07820SSatish Balay } else a->garray = 0; 23450ac07820SSatish Balay 23460ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 23470ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 23480ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2349e18c124aSSatish Balay 23500ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 23512e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 23520ac07820SSatish Balay PLogObjectParent(mat,a->A); 23532e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 23540ac07820SSatish Balay PLogObjectParent(mat,a->B); 23550ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2356e18c124aSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 23570ac07820SSatish Balay if (flg) { 23580ac07820SSatish Balay ierr = MatPrintHelp(mat);CHKERRQ(ierr); 23590ac07820SSatish Balay } 23600ac07820SSatish Balay *newmat = mat; 23613a40ed3dSBarry Smith PetscFunctionReturn(0); 23620ac07820SSatish Balay } 236357b952d6SSatish Balay 236457b952d6SSatish Balay #include "sys.h" 236557b952d6SSatish Balay 23665615d1e5SSatish Balay #undef __FUNC__ 2367b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 236857b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 236957b952d6SSatish Balay { 237057b952d6SSatish Balay Mat A; 237157b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 237257b952d6SSatish Balay Scalar *vals,*buf; 237357b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 237457b952d6SSatish Balay MPI_Status status; 2375cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 237657b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2377f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 237857b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 237957b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 238057b952d6SSatish Balay 2381d64ed03dSBarry Smith PetscFunctionBegin; 2382f1af5d2fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 238357b952d6SSatish Balay 2384d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2385d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 238657b952d6SSatish Balay if (!rank) { 238757b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2388e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2389a8c6a408SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2390d64ed03dSBarry Smith if (header[3] < 0) { 2391a8c6a408SBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2392d64ed03dSBarry Smith } 23936c5fab8fSBarry Smith } 2394d64ed03dSBarry Smith 2395ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 239657b952d6SSatish Balay M = header[1]; N = header[2]; 239757b952d6SSatish Balay 2398a8c6a408SBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 239957b952d6SSatish Balay 240057b952d6SSatish Balay /* 240157b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 240257b952d6SSatish Balay divisible by the blocksize 240357b952d6SSatish Balay */ 240457b952d6SSatish Balay Mbs = M/bs; 240557b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 240657b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 240757b952d6SSatish Balay else Mbs++; 240857b952d6SSatish Balay if (extra_rows &&!rank) { 2409b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 241057b952d6SSatish Balay } 2411537820f0SBarry Smith 241257b952d6SSatish Balay /* determine ownership of all rows */ 241357b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 241457b952d6SSatish Balay m = mbs * bs; 2415cee3aa6bSSatish Balay rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2416cee3aa6bSSatish Balay browners = rowners + size + 1; 2417ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 241857b952d6SSatish Balay rowners[0] = 0; 2419cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2420cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 242157b952d6SSatish Balay rstart = rowners[rank]; 242257b952d6SSatish Balay rend = rowners[rank+1]; 242357b952d6SSatish Balay 242457b952d6SSatish Balay /* distribute row lengths to all processors */ 242557b952d6SSatish Balay locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 242657b952d6SSatish Balay if (!rank) { 242757b952d6SSatish Balay rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2428e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 242957b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 243057b952d6SSatish Balay sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2431cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2432ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2433606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2434d64ed03dSBarry Smith } else { 2435ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 243657b952d6SSatish Balay } 243757b952d6SSatish Balay 243857b952d6SSatish Balay if (!rank) { 243957b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 244057b952d6SSatish Balay procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2441549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 244257b952d6SSatish Balay for (i=0; i<size; i++) { 244357b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 244457b952d6SSatish Balay procsnz[i] += rowlengths[j]; 244557b952d6SSatish Balay } 244657b952d6SSatish Balay } 2447606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 244857b952d6SSatish Balay 244957b952d6SSatish Balay /* determine max buffer needed and allocate it */ 245057b952d6SSatish Balay maxnz = 0; 245157b952d6SSatish Balay for (i=0; i<size; i++) { 245257b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 245357b952d6SSatish Balay } 245457b952d6SSatish Balay cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 245557b952d6SSatish Balay 245657b952d6SSatish Balay /* read in my part of the matrix column indices */ 245757b952d6SSatish Balay nz = procsnz[0]; 245857b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 245957b952d6SSatish Balay mycols = ibuf; 2460cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2461e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2462cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2463cee3aa6bSSatish Balay 246457b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 246557b952d6SSatish Balay for (i=1; i<size-1; i++) { 246657b952d6SSatish Balay nz = procsnz[i]; 2467e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2468ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 246957b952d6SSatish Balay } 247057b952d6SSatish Balay /* read in the stuff for the last proc */ 247157b952d6SSatish Balay if (size != 1) { 247257b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2473e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 247457b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2475ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 247657b952d6SSatish Balay } 2477606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2478d64ed03dSBarry Smith } else { 247957b952d6SSatish Balay /* determine buffer space needed for message */ 248057b952d6SSatish Balay nz = 0; 248157b952d6SSatish Balay for (i=0; i<m; i++) { 248257b952d6SSatish Balay nz += locrowlens[i]; 248357b952d6SSatish Balay } 248457b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 248557b952d6SSatish Balay mycols = ibuf; 248657b952d6SSatish Balay /* receive message of column indices*/ 2487ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2488ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2489a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 249057b952d6SSatish Balay } 249157b952d6SSatish Balay 249257b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2493cee3aa6bSSatish Balay dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2494cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 249557b952d6SSatish Balay mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2496549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 249757b952d6SSatish Balay masked1 = mask + Mbs; 249857b952d6SSatish Balay masked2 = masked1 + Mbs; 249957b952d6SSatish Balay rowcount = 0; nzcount = 0; 250057b952d6SSatish Balay for (i=0; i<mbs; i++) { 250157b952d6SSatish Balay dcount = 0; 250257b952d6SSatish Balay odcount = 0; 250357b952d6SSatish Balay for (j=0; j<bs; j++) { 250457b952d6SSatish Balay kmax = locrowlens[rowcount]; 250557b952d6SSatish Balay for (k=0; k<kmax; k++) { 250657b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 250757b952d6SSatish Balay if (!mask[tmp]) { 250857b952d6SSatish Balay mask[tmp] = 1; 250957b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 251057b952d6SSatish Balay else masked1[dcount++] = tmp; 251157b952d6SSatish Balay } 251257b952d6SSatish Balay } 251357b952d6SSatish Balay rowcount++; 251457b952d6SSatish Balay } 2515cee3aa6bSSatish Balay 251657b952d6SSatish Balay dlens[i] = dcount; 251757b952d6SSatish Balay odlens[i] = odcount; 2518cee3aa6bSSatish Balay 251957b952d6SSatish Balay /* zero out the mask elements we set */ 252057b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 252157b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 252257b952d6SSatish Balay } 2523cee3aa6bSSatish Balay 252457b952d6SSatish Balay /* create our matrix */ 2525549d3d68SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 252657b952d6SSatish Balay A = *newmat; 25276d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 252857b952d6SSatish Balay 252957b952d6SSatish Balay if (!rank) { 253057b952d6SSatish Balay buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 253157b952d6SSatish Balay /* read in my part of the matrix numerical values */ 253257b952d6SSatish Balay nz = procsnz[0]; 253357b952d6SSatish Balay vals = buf; 2534cee3aa6bSSatish Balay mycols = ibuf; 2535cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2536e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2537cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2538537820f0SBarry Smith 253957b952d6SSatish Balay /* insert into matrix */ 254057b952d6SSatish Balay jj = rstart*bs; 254157b952d6SSatish Balay for (i=0; i<m; i++) { 25423eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 254357b952d6SSatish Balay mycols += locrowlens[i]; 254457b952d6SSatish Balay vals += locrowlens[i]; 254557b952d6SSatish Balay jj++; 254657b952d6SSatish Balay } 254757b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 254857b952d6SSatish Balay for (i=1; i<size-1; i++) { 254957b952d6SSatish Balay nz = procsnz[i]; 255057b952d6SSatish Balay vals = buf; 2551e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2552ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 255357b952d6SSatish Balay } 255457b952d6SSatish Balay /* the last proc */ 255557b952d6SSatish Balay if (size != 1){ 255657b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2557cee3aa6bSSatish Balay vals = buf; 2558e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 255957b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2560ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 256157b952d6SSatish Balay } 2562606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2563d64ed03dSBarry Smith } else { 256457b952d6SSatish Balay /* receive numeric values */ 256557b952d6SSatish Balay buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 256657b952d6SSatish Balay 256757b952d6SSatish Balay /* receive message of values*/ 256857b952d6SSatish Balay vals = buf; 2569cee3aa6bSSatish Balay mycols = ibuf; 2570ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2571ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2572a8c6a408SBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 257357b952d6SSatish Balay 257457b952d6SSatish Balay /* insert into matrix */ 257557b952d6SSatish Balay jj = rstart*bs; 2576cee3aa6bSSatish Balay for (i=0; i<m; i++) { 25773eda8832SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 257857b952d6SSatish Balay mycols += locrowlens[i]; 257957b952d6SSatish Balay vals += locrowlens[i]; 258057b952d6SSatish Balay jj++; 258157b952d6SSatish Balay } 258257b952d6SSatish Balay } 2583606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2584606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2585606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2586606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2587606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2588606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 25896d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25906d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25913a40ed3dSBarry Smith PetscFunctionReturn(0); 259257b952d6SSatish Balay } 259357b952d6SSatish Balay 2594133cdb44SSatish Balay #undef __FUNC__ 2595b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2596133cdb44SSatish Balay /*@ 2597133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2598133cdb44SSatish Balay 2599133cdb44SSatish Balay Input Parameters: 2600133cdb44SSatish Balay . mat - the matrix 2601133cdb44SSatish Balay . fact - factor 2602133cdb44SSatish Balay 2603fee21e36SBarry Smith Collective on Mat 2604fee21e36SBarry Smith 26058c890885SBarry Smith Level: advanced 26068c890885SBarry Smith 2607133cdb44SSatish Balay Notes: 2608133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2609133cdb44SSatish Balay 2610133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2611133cdb44SSatish Balay 2612133cdb44SSatish Balay .seealso: MatSetOption() 2613133cdb44SSatish Balay @*/ 2614329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2615133cdb44SSatish Balay { 261625fdafccSSatish Balay Mat_MPIBAIJ *baij; 2617133cdb44SSatish Balay 2618133cdb44SSatish Balay PetscFunctionBegin; 2619133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 262025fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2621133cdb44SSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2622133cdb44SSatish Balay } 2623133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2624133cdb44SSatish Balay baij->ht_fact = fact; 2625133cdb44SSatish Balay PetscFunctionReturn(0); 2626133cdb44SSatish Balay } 2627