1*29bbc08cSBarry Smith /*$Id: mpibaij.c,v 1.202 2000/09/13 03:10:54 bsmith Exp bsmith $*/ 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h" 579bdfe76SSatish Balay 6ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat); 8ca44d042SBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9ca44d042SBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10ca44d042SBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11ca44d042SBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13ca44d042SBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14ca44d042SBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16ca44d042SBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*); 1793fea6afSBarry Smith 1893fea6afSBarry Smith /* UGLY, ugly, ugly 1993fea6afSBarry Smith When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 2093fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2193fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2293fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2393fea6afSBarry Smith into the single precision data structures. 2493fea6afSBarry Smith */ 2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 26ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 3193fea6afSBarry Smith #else 326fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 356fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 366fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3793fea6afSBarry Smith #endif 383b2fbd54SBarry Smith 397fc3c18eSBarry Smith EXTERN_C_BEGIN 407fc3c18eSBarry Smith #undef __FUNC__ 416fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 427fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat) 437fc3c18eSBarry Smith { 447fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 457fc3c18eSBarry Smith int ierr; 467fc3c18eSBarry Smith 477fc3c18eSBarry Smith PetscFunctionBegin; 487fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 497fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 507fc3c18eSBarry Smith PetscFunctionReturn(0); 517fc3c18eSBarry Smith } 527fc3c18eSBarry Smith EXTERN_C_END 537fc3c18eSBarry Smith 547fc3c18eSBarry Smith EXTERN_C_BEGIN 557fc3c18eSBarry Smith #undef __FUNC__ 566fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 577fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat) 587fc3c18eSBarry Smith { 597fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 607fc3c18eSBarry Smith int ierr; 617fc3c18eSBarry Smith 627fc3c18eSBarry Smith PetscFunctionBegin; 637fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 647fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 657fc3c18eSBarry Smith PetscFunctionReturn(0); 667fc3c18eSBarry Smith } 677fc3c18eSBarry Smith EXTERN_C_END 687fc3c18eSBarry Smith 69537820f0SBarry Smith /* 70537820f0SBarry Smith Local utility routine that creates a mapping from the global column 7157b952d6SSatish Balay number to the local number in the off-diagonal part of the local 7257b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 7357b952d6SSatish Balay length of colmap equals the global matrix length. 7457b952d6SSatish Balay */ 755615d1e5SSatish Balay #undef __FUNC__ 766fa18ffdSBarry Smith #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private" 7757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 7857b952d6SSatish Balay { 7957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 8057b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 81dc2900e9SSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 8257b952d6SSatish Balay 83d64ed03dSBarry Smith PetscFunctionBegin; 84aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 85f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 8648e59246SSatish Balay for (i=0; i<nbs; i++){ 870f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 8848e59246SSatish Balay } 8948e59246SSatish Balay #else 90758f045eSSatish Balay baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 9157b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 92549d3d68SSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 93928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 9448e59246SSatish Balay #endif 953a40ed3dSBarry Smith PetscFunctionReturn(0); 9657b952d6SSatish Balay } 9757b952d6SSatish Balay 9880c1aa95SSatish Balay #define CHUNKSIZE 10 9980c1aa95SSatish Balay 100f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 10180c1aa95SSatish Balay { \ 10280c1aa95SSatish Balay \ 10380c1aa95SSatish Balay brow = row/bs; \ 10480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 10680c1aa95SSatish Balay bcol = col/bs; \ 10780c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 108ab26458aSBarry Smith low = 0; high = nrow; \ 109ab26458aSBarry Smith while (high-low > 3) { \ 110ab26458aSBarry Smith t = (low+high)/2; \ 111ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 112ab26458aSBarry Smith else low = t; \ 113ab26458aSBarry Smith } \ 114ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11580c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11680c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11780c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 118eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 119eada6651SSatish Balay else *bap = value; \ 120ac7a638eSSatish Balay goto a_noinsert; \ 12180c1aa95SSatish Balay } \ 12280c1aa95SSatish Balay } \ 12389280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 124*29bbc08cSBarry Smith else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 12580c1aa95SSatish Balay if (nrow >= rmax) { \ 12680c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 12780c1aa95SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1283eda8832SBarry Smith MatScalar *new_a; \ 12980c1aa95SSatish Balay \ 130*29bbc08cSBarry Smith if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 13189280ab3SLois Curfman McInnes \ 13280c1aa95SSatish Balay /* malloc new storage space */ \ 1333eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 1343eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 13580c1aa95SSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 13680c1aa95SSatish Balay new_i = new_j + new_nz; \ 13780c1aa95SSatish Balay \ 13880c1aa95SSatish Balay /* copy over old data into new slots */ \ 13980c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 14080c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 141549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 14280c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 1433eda8832SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 1443eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 145549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 146549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1473eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 14880c1aa95SSatish Balay /* free up old matrix storage */ \ 149606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 150606d414cSSatish Balay if (!a->singlemalloc) { \ 151606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 152606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 153606d414cSSatish Balay } \ 15480c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1557c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 15680c1aa95SSatish Balay \ 15780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 158ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 1593eda8832SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 16080c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 16180c1aa95SSatish Balay a->reallocs++; \ 16280c1aa95SSatish Balay a->nz++; \ 16380c1aa95SSatish Balay } \ 16480c1aa95SSatish Balay N = nrow++ - 1; \ 16580c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 16680c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 16780c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1683eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16980c1aa95SSatish Balay } \ 1703eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 17180c1aa95SSatish Balay rp[_i] = bcol; \ 17280c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 173ac7a638eSSatish Balay a_noinsert:; \ 17480c1aa95SSatish Balay ailen[brow] = nrow; \ 17580c1aa95SSatish Balay } 17657b952d6SSatish Balay 177ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 178ac7a638eSSatish Balay { \ 179ac7a638eSSatish Balay brow = row/bs; \ 180ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 181ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 182ac7a638eSSatish Balay bcol = col/bs; \ 183ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 184ac7a638eSSatish Balay low = 0; high = nrow; \ 185ac7a638eSSatish Balay while (high-low > 3) { \ 186ac7a638eSSatish Balay t = (low+high)/2; \ 187ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 188ac7a638eSSatish Balay else low = t; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 191ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 192ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 193ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 194ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 195ac7a638eSSatish Balay else *bap = value; \ 196ac7a638eSSatish Balay goto b_noinsert; \ 197ac7a638eSSatish Balay } \ 198ac7a638eSSatish Balay } \ 19989280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 200*29bbc08cSBarry Smith else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 201ac7a638eSSatish Balay if (nrow >= rmax) { \ 202ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 20374c639caSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2043eda8832SBarry Smith MatScalar *new_a; \ 205ac7a638eSSatish Balay \ 206*29bbc08cSBarry Smith if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 20789280ab3SLois Curfman McInnes \ 208ac7a638eSSatish Balay /* malloc new storage space */ \ 2093eda8832SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 2103eda8832SBarry Smith new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 211ac7a638eSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 212ac7a638eSSatish Balay new_i = new_j + new_nz; \ 213ac7a638eSSatish Balay \ 214ac7a638eSSatish Balay /* copy over old data into new slots */ \ 215ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 21674c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 217549d3d68SSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 218ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 2193eda8832SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 2203eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2213eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 222549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2233eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 224ac7a638eSSatish Balay /* free up old matrix storage */ \ 225606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 226606d414cSSatish Balay if (!b->singlemalloc) { \ 227606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 228606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 229606d414cSSatish Balay } \ 23074c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2317c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 232ac7a638eSSatish Balay \ 233ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 234ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 2353eda8832SBarry Smith PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 23674c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 23774c639caSSatish Balay b->reallocs++; \ 23874c639caSSatish Balay b->nz++; \ 239ac7a638eSSatish Balay } \ 240ac7a638eSSatish Balay N = nrow++ - 1; \ 241ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 242ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 243ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2443eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 245ac7a638eSSatish Balay } \ 2463eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 247ac7a638eSSatish Balay rp[_i] = bcol; \ 248ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 249ac7a638eSSatish Balay b_noinsert:; \ 250ac7a638eSSatish Balay bilen[brow] = nrow; \ 251ac7a638eSSatish Balay } 252ac7a638eSSatish Balay 25393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2545615d1e5SSatish Balay #undef __FUNC__ 2556fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 256ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 25757b952d6SSatish Balay { 2586fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 25993fea6afSBarry Smith int ierr,i,N = m*n; 2606fa18ffdSBarry Smith MatScalar *vsingle; 26193fea6afSBarry Smith 26293fea6afSBarry Smith PetscFunctionBegin; 2636fa18ffdSBarry Smith if (N > b->setvalueslen) { 2646fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 2656fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 2666fa18ffdSBarry Smith b->setvalueslen = N; 2676fa18ffdSBarry Smith } 2686fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2696fa18ffdSBarry Smith 27093fea6afSBarry Smith for (i=0; i<N; i++) { 27193fea6afSBarry Smith vsingle[i] = v[i]; 27293fea6afSBarry Smith } 27393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 27493fea6afSBarry Smith PetscFunctionReturn(0); 27593fea6afSBarry Smith } 27693fea6afSBarry Smith 27793fea6afSBarry Smith #undef __FUNC__ 2786fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 27993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 28093fea6afSBarry Smith { 2816fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 2826fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 2836fa18ffdSBarry Smith MatScalar *vsingle; 28493fea6afSBarry Smith 28593fea6afSBarry Smith PetscFunctionBegin; 2866fa18ffdSBarry Smith if (N > b->setvalueslen) { 2876fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 2886fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 2896fa18ffdSBarry Smith b->setvalueslen = N; 2906fa18ffdSBarry Smith } 2916fa18ffdSBarry Smith vsingle = b->setvaluescopy; 29293fea6afSBarry Smith for (i=0; i<N; i++) { 29393fea6afSBarry Smith vsingle[i] = v[i]; 29493fea6afSBarry Smith } 29593fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 29693fea6afSBarry Smith PetscFunctionReturn(0); 29793fea6afSBarry Smith } 2986fa18ffdSBarry Smith 2996fa18ffdSBarry Smith #undef __FUNC__ 3006fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT" 3016fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3026fa18ffdSBarry Smith { 3036fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3046fa18ffdSBarry Smith int ierr,i,N = m*n; 3056fa18ffdSBarry Smith MatScalar *vsingle; 3066fa18ffdSBarry Smith 3076fa18ffdSBarry Smith PetscFunctionBegin; 3086fa18ffdSBarry Smith if (N > b->setvalueslen) { 3096fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 3106fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 3116fa18ffdSBarry Smith b->setvalueslen = N; 3126fa18ffdSBarry Smith } 3136fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3146fa18ffdSBarry Smith for (i=0; i<N; i++) { 3156fa18ffdSBarry Smith vsingle[i] = v[i]; 3166fa18ffdSBarry Smith } 3176fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3186fa18ffdSBarry Smith PetscFunctionReturn(0); 3196fa18ffdSBarry Smith } 3206fa18ffdSBarry Smith 3216fa18ffdSBarry Smith #undef __FUNC__ 3226fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 3236fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 3246fa18ffdSBarry Smith { 3256fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 3266fa18ffdSBarry Smith int ierr,i,N = m*n*b->bs2; 3276fa18ffdSBarry Smith MatScalar *vsingle; 3286fa18ffdSBarry Smith 3296fa18ffdSBarry Smith PetscFunctionBegin; 3306fa18ffdSBarry Smith if (N > b->setvalueslen) { 3316fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 3326fa18ffdSBarry Smith b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 3336fa18ffdSBarry Smith b->setvalueslen = N; 3346fa18ffdSBarry Smith } 3356fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3366fa18ffdSBarry Smith for (i=0; i<N; i++) { 3376fa18ffdSBarry Smith vsingle[i] = v[i]; 3386fa18ffdSBarry Smith } 3396fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3406fa18ffdSBarry Smith PetscFunctionReturn(0); 3416fa18ffdSBarry Smith } 34293fea6afSBarry Smith #endif 34393fea6afSBarry Smith 34493fea6afSBarry Smith #undef __FUNC__ 3456fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 34693fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 34793fea6afSBarry Smith { 34857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 34993fea6afSBarry Smith MatScalar value; 3504fa0d573SSatish Balay int ierr,i,j,row,col; 3514fa0d573SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 3524fa0d573SSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 3534fa0d573SSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 35457b952d6SSatish Balay 355eada6651SSatish Balay /* Some Variables required in the macro */ 35680c1aa95SSatish Balay Mat A = baij->A; 35780c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 358ac7a638eSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3593eda8832SBarry Smith MatScalar *aa=a->a; 360ac7a638eSSatish Balay 361ac7a638eSSatish Balay Mat B = baij->B; 362ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 363ac7a638eSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3643eda8832SBarry Smith MatScalar *ba=b->a; 365ac7a638eSSatish Balay 366ac7a638eSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 367ab26458aSBarry Smith int low,high,t,ridx,cidx,bs2=a->bs2; 3683eda8832SBarry Smith MatScalar *ap,*bap; 36980c1aa95SSatish Balay 370d64ed03dSBarry Smith PetscFunctionBegin; 37157b952d6SSatish Balay for (i=0; i<m; i++) { 3725ef9f2a5SBarry Smith if (im[i] < 0) continue; 373aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 374*29bbc08cSBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 375639f9d9dSBarry Smith #endif 37657b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 37757b952d6SSatish Balay row = im[i] - rstart_orig; 37857b952d6SSatish Balay for (j=0; j<n; j++) { 37957b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 38057b952d6SSatish Balay col = in[j] - cstart_orig; 38157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 382f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 38380c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 38473959e64SBarry Smith } else if (in[j] < 0) continue; 385aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 386*29bbc08cSBarry Smith else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 387639f9d9dSBarry Smith #endif 38857b952d6SSatish Balay else { 38957b952d6SSatish Balay if (mat->was_assembled) { 390905e6a2fSBarry Smith if (!baij->colmap) { 391905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 392905e6a2fSBarry Smith } 393aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3940f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 395fa46199cSSatish Balay col = col - 1 + in[j]%bs; 39648e59246SSatish Balay #else 397905e6a2fSBarry Smith col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 39848e59246SSatish Balay #endif 39957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 40057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 40157b952d6SSatish Balay col = in[j]; 4029bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4039bf004c3SSatish Balay B = baij->B; 4049bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4059bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4069bf004c3SSatish Balay ba=b->a; 40757b952d6SSatish Balay } 408d64ed03dSBarry Smith } else col = in[j]; 40957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410ac7a638eSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 411ac7a638eSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41257b952d6SSatish Balay } 41357b952d6SSatish Balay } 414d64ed03dSBarry Smith } else { 41590f02eecSBarry Smith if (!baij->donotstash) { 416ff2fd236SBarry Smith if (roworiented) { 4176fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 418ff2fd236SBarry Smith } else { 4196fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 42057b952d6SSatish Balay } 42157b952d6SSatish Balay } 42257b952d6SSatish Balay } 42390f02eecSBarry Smith } 4243a40ed3dSBarry Smith PetscFunctionReturn(0); 42557b952d6SSatish Balay } 42657b952d6SSatish Balay 427ab26458aSBarry Smith #undef __FUNC__ 4286fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 42993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 430ab26458aSBarry Smith { 431ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 43293fea6afSBarry Smith MatScalar *value,*barray=baij->barray; 4337ef1d9bdSSatish Balay int ierr,i,j,ii,jj,row,col; 434ab26458aSBarry Smith int roworiented = baij->roworiented,rstart=baij->rstart ; 435ab26458aSBarry Smith int rend=baij->rend,cstart=baij->cstart,stepval; 436ab26458aSBarry Smith int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 437ab26458aSBarry Smith 438b16ae2b1SBarry Smith PetscFunctionBegin; 43930793edcSSatish Balay if(!barray) { 44093fea6afSBarry Smith baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 44130793edcSSatish Balay } 44230793edcSSatish Balay 443ab26458aSBarry Smith if (roworiented) { 444ab26458aSBarry Smith stepval = (n-1)*bs; 445ab26458aSBarry Smith } else { 446ab26458aSBarry Smith stepval = (m-1)*bs; 447ab26458aSBarry Smith } 448ab26458aSBarry Smith for (i=0; i<m; i++) { 4495ef9f2a5SBarry Smith if (im[i] < 0) continue; 450aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 451*29bbc08cSBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 452ab26458aSBarry Smith #endif 453ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 454ab26458aSBarry Smith row = im[i] - rstart; 455ab26458aSBarry Smith for (j=0; j<n; j++) { 45615b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 45715b57d14SSatish Balay if ((roworiented) && (n == 1)) { 45815b57d14SSatish Balay barray = v + i*bs2; 45915b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 46015b57d14SSatish Balay barray = v + j*bs2; 46115b57d14SSatish Balay } else { /* Here a copy is required */ 462ab26458aSBarry Smith if (roworiented) { 463ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 464ab26458aSBarry Smith } else { 465ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 466abef11f7SSatish Balay } 46747513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 46847513183SBarry Smith for (jj=0; jj<bs; jj++) { 46930793edcSSatish Balay *barray++ = *value++; 47047513183SBarry Smith } 47147513183SBarry Smith } 47230793edcSSatish Balay barray -=bs2; 47315b57d14SSatish Balay } 474abef11f7SSatish Balay 475abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 476abef11f7SSatish Balay col = in[j] - cstart; 47793fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 478ab26458aSBarry Smith } 4795ef9f2a5SBarry Smith else if (in[j] < 0) continue; 480aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 481*29bbc08cSBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 482ab26458aSBarry Smith #endif 483ab26458aSBarry Smith else { 484ab26458aSBarry Smith if (mat->was_assembled) { 485ab26458aSBarry Smith if (!baij->colmap) { 486ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 487ab26458aSBarry Smith } 488a5eb4965SSatish Balay 489aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 490aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 491fa46199cSSatish Balay { int data; 4920f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 493*29bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 494fa46199cSSatish Balay } 49548e59246SSatish Balay #else 496*29bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 497a5eb4965SSatish Balay #endif 49848e59246SSatish Balay #endif 499aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5000f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 501fa46199cSSatish Balay col = (col - 1)/bs; 50248e59246SSatish Balay #else 503a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 50448e59246SSatish Balay #endif 505ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 506ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 507ab26458aSBarry Smith col = in[j]; 508ab26458aSBarry Smith } 509ab26458aSBarry Smith } 510ab26458aSBarry Smith else col = in[j]; 51193fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 512ab26458aSBarry Smith } 513ab26458aSBarry Smith } 514d64ed03dSBarry Smith } else { 515ab26458aSBarry Smith if (!baij->donotstash) { 516ff2fd236SBarry Smith if (roworiented) { 5176fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 518ff2fd236SBarry Smith } else { 5196fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 520ff2fd236SBarry Smith } 521abef11f7SSatish Balay } 522ab26458aSBarry Smith } 523ab26458aSBarry Smith } 5243a40ed3dSBarry Smith PetscFunctionReturn(0); 525ab26458aSBarry Smith } 5266fa18ffdSBarry Smith 5270bdbc534SSatish Balay #define HASH_KEY 0.6180339887 528c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 5296fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 530c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 5315615d1e5SSatish Balay #undef __FUNC__ 5326fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar" 5336fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 5340bdbc534SSatish Balay { 5350bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 5360bdbc534SSatish Balay int ierr,i,j,row,col; 5370bdbc534SSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 538c2760754SSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 539c2760754SSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 540329f5518SBarry Smith PetscReal tmp; 5413eda8832SBarry Smith MatScalar **HD = baij->hd,value; 542aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 5434a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5444a15367fSSatish Balay #endif 5450bdbc534SSatish Balay 5460bdbc534SSatish Balay PetscFunctionBegin; 5470bdbc534SSatish Balay 5480bdbc534SSatish Balay for (i=0; i<m; i++) { 549aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 550*29bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 551*29bbc08cSBarry Smith if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 5520bdbc534SSatish Balay #endif 5530bdbc534SSatish Balay row = im[i]; 554c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5550bdbc534SSatish Balay for (j=0; j<n; j++) { 5560bdbc534SSatish Balay col = in[j]; 5576fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5580bdbc534SSatish Balay /* Look up into the Hash Table */ 559c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 560c2760754SSatish Balay h1 = HASH(size,key,tmp); 5610bdbc534SSatish Balay 562c2760754SSatish Balay 563c2760754SSatish Balay idx = h1; 564aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 565187ce0cbSSatish Balay insert_ct++; 566187ce0cbSSatish Balay total_ct++; 567187ce0cbSSatish Balay if (HT[idx] != key) { 568187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 569187ce0cbSSatish Balay if (idx == size) { 570187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 571187ce0cbSSatish Balay if (idx == h1) { 572*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 573187ce0cbSSatish Balay } 574187ce0cbSSatish Balay } 575187ce0cbSSatish Balay } 576187ce0cbSSatish Balay #else 577c2760754SSatish Balay if (HT[idx] != key) { 578c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 579c2760754SSatish Balay if (idx == size) { 580c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 581c2760754SSatish Balay if (idx == h1) { 582*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 583c2760754SSatish Balay } 584c2760754SSatish Balay } 585c2760754SSatish Balay } 586187ce0cbSSatish Balay #endif 587c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 588c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 589c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5900bdbc534SSatish Balay } 5910bdbc534SSatish Balay } else { 5920bdbc534SSatish Balay if (!baij->donotstash) { 593ff2fd236SBarry Smith if (roworiented) { 5948798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 595ff2fd236SBarry Smith } else { 5968798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5970bdbc534SSatish Balay } 5980bdbc534SSatish Balay } 5990bdbc534SSatish Balay } 6000bdbc534SSatish Balay } 601aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 602187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 603187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 604187ce0cbSSatish Balay #endif 6050bdbc534SSatish Balay PetscFunctionReturn(0); 6060bdbc534SSatish Balay } 6070bdbc534SSatish Balay 6080bdbc534SSatish Balay #undef __FUNC__ 6096fa18ffdSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 6106fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 6110bdbc534SSatish Balay { 6120bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6138798bf22SSatish Balay int ierr,i,j,ii,jj,row,col; 6140bdbc534SSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 615b4cc0f5aSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 616c2760754SSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 617329f5518SBarry Smith PetscReal tmp; 6183eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 6196fa18ffdSBarry Smith MatScalar *v_t,*value; 620aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 6214a15367fSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6224a15367fSSatish Balay #endif 6230bdbc534SSatish Balay 624d0a41580SSatish Balay PetscFunctionBegin; 625d0a41580SSatish Balay 6260bdbc534SSatish Balay if (roworiented) { 6270bdbc534SSatish Balay stepval = (n-1)*bs; 6280bdbc534SSatish Balay } else { 6290bdbc534SSatish Balay stepval = (m-1)*bs; 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay for (i=0; i<m; i++) { 632aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 633*29bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 634*29bbc08cSBarry Smith if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 6350bdbc534SSatish Balay #endif 6360bdbc534SSatish Balay row = im[i]; 637187ce0cbSSatish Balay v_t = v + i*bs2; 638c2760754SSatish Balay if (row >= rstart && row < rend) { 6390bdbc534SSatish Balay for (j=0; j<n; j++) { 6400bdbc534SSatish Balay col = in[j]; 6410bdbc534SSatish Balay 6420bdbc534SSatish Balay /* Look up into the Hash Table */ 643c2760754SSatish Balay key = row*Nbs+col+1; 644c2760754SSatish Balay h1 = HASH(size,key,tmp); 6450bdbc534SSatish Balay 646c2760754SSatish Balay idx = h1; 647aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 648187ce0cbSSatish Balay total_ct++; 649187ce0cbSSatish Balay insert_ct++; 650187ce0cbSSatish Balay if (HT[idx] != key) { 651187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 652187ce0cbSSatish Balay if (idx == size) { 653187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 654187ce0cbSSatish Balay if (idx == h1) { 655*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 656187ce0cbSSatish Balay } 657187ce0cbSSatish Balay } 658187ce0cbSSatish Balay } 659187ce0cbSSatish Balay #else 660c2760754SSatish Balay if (HT[idx] != key) { 661c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 662c2760754SSatish Balay if (idx == size) { 663c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 664c2760754SSatish Balay if (idx == h1) { 665*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 666c2760754SSatish Balay } 667c2760754SSatish Balay } 668c2760754SSatish Balay } 669187ce0cbSSatish Balay #endif 670c2760754SSatish Balay baij_a = HD[idx]; 6710bdbc534SSatish Balay if (roworiented) { 672c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 673187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 674187ce0cbSSatish Balay value = v_t; 675187ce0cbSSatish Balay v_t += bs; 676fef45726SSatish Balay if (addv == ADD_VALUES) { 677c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 678c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 679fef45726SSatish Balay baij_a[jj] += *value++; 680b4cc0f5aSSatish Balay } 681b4cc0f5aSSatish Balay } 682fef45726SSatish Balay } else { 683c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 684c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 685fef45726SSatish Balay baij_a[jj] = *value++; 686fef45726SSatish Balay } 687fef45726SSatish Balay } 688fef45726SSatish Balay } 6890bdbc534SSatish Balay } else { 6900bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 691fef45726SSatish Balay if (addv == ADD_VALUES) { 692b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6930bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 694fef45726SSatish Balay baij_a[jj] += *value++; 695fef45726SSatish Balay } 696fef45726SSatish Balay } 697fef45726SSatish Balay } else { 698fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 699fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 700fef45726SSatish Balay baij_a[jj] = *value++; 701fef45726SSatish Balay } 702b4cc0f5aSSatish Balay } 7030bdbc534SSatish Balay } 7040bdbc534SSatish Balay } 7050bdbc534SSatish Balay } 7060bdbc534SSatish Balay } else { 7070bdbc534SSatish Balay if (!baij->donotstash) { 7080bdbc534SSatish Balay if (roworiented) { 7098798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7100bdbc534SSatish Balay } else { 7118798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7120bdbc534SSatish Balay } 7130bdbc534SSatish Balay } 7140bdbc534SSatish Balay } 7150bdbc534SSatish Balay } 716aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 717187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 718187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 719187ce0cbSSatish Balay #endif 7200bdbc534SSatish Balay PetscFunctionReturn(0); 7210bdbc534SSatish Balay } 722133cdb44SSatish Balay 7230bdbc534SSatish Balay #undef __FUNC__ 724b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 725ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 726d6de1c52SSatish Balay { 727d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 728d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 72948e59246SSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 730d6de1c52SSatish Balay 731133cdb44SSatish Balay PetscFunctionBegin; 732d6de1c52SSatish Balay for (i=0; i<m; i++) { 733*29bbc08cSBarry Smith if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 734*29bbc08cSBarry Smith if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 735d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 736d6de1c52SSatish Balay row = idxm[i] - bsrstart; 737d6de1c52SSatish Balay for (j=0; j<n; j++) { 738*29bbc08cSBarry Smith if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 739*29bbc08cSBarry Smith if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 740d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 741d6de1c52SSatish Balay col = idxn[j] - bscstart; 74298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 743d64ed03dSBarry Smith } else { 744905e6a2fSBarry Smith if (!baij->colmap) { 745905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 746905e6a2fSBarry Smith } 747aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7480f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 749fa46199cSSatish Balay data --; 75048e59246SSatish Balay #else 75148e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 75248e59246SSatish Balay #endif 75348e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 754d9d09a02SSatish Balay else { 75548e59246SSatish Balay col = data + idxn[j]%bs; 75698dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 757d6de1c52SSatish Balay } 758d6de1c52SSatish Balay } 759d6de1c52SSatish Balay } 760d64ed03dSBarry Smith } else { 761*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 762d6de1c52SSatish Balay } 763d6de1c52SSatish Balay } 7643a40ed3dSBarry Smith PetscFunctionReturn(0); 765d6de1c52SSatish Balay } 766d6de1c52SSatish Balay 7675615d1e5SSatish Balay #undef __FUNC__ 768b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 769329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 770d6de1c52SSatish Balay { 771d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 772d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 773acdf5bf4SSatish Balay int ierr,i,bs2=baij->bs2; 774329f5518SBarry Smith PetscReal sum = 0.0; 7753eda8832SBarry Smith MatScalar *v; 776d6de1c52SSatish Balay 777d64ed03dSBarry Smith PetscFunctionBegin; 778d6de1c52SSatish Balay if (baij->size == 1) { 779d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 780d6de1c52SSatish Balay } else { 781d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 782d6de1c52SSatish Balay v = amat->a; 783d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 784aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 785329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 786d6de1c52SSatish Balay #else 787d6de1c52SSatish Balay sum += (*v)*(*v); v++; 788d6de1c52SSatish Balay #endif 789d6de1c52SSatish Balay } 790d6de1c52SSatish Balay v = bmat->a; 791d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 792aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 793329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 794d6de1c52SSatish Balay #else 795d6de1c52SSatish Balay sum += (*v)*(*v); v++; 796d6de1c52SSatish Balay #endif 797d6de1c52SSatish Balay } 798ca161407SBarry Smith ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 799d6de1c52SSatish Balay *norm = sqrt(*norm); 800d64ed03dSBarry Smith } else { 801*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 802d6de1c52SSatish Balay } 803d64ed03dSBarry Smith } 8043a40ed3dSBarry Smith PetscFunctionReturn(0); 805d6de1c52SSatish Balay } 80657b952d6SSatish Balay 807bd7f49f5SSatish Balay 808fef45726SSatish Balay /* 809fef45726SSatish Balay Creates the hash table, and sets the table 810fef45726SSatish Balay This table is created only once. 811fef45726SSatish Balay If new entried need to be added to the matrix 812fef45726SSatish Balay then the hash table has to be destroyed and 813fef45726SSatish Balay recreated. 814fef45726SSatish Balay */ 815fef45726SSatish Balay #undef __FUNC__ 816b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 817329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 818596b8d2eSBarry Smith { 819596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 820596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 821596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 8220bdbc534SSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 823549d3d68SSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 824187ce0cbSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 825fef45726SSatish Balay int *HT,key; 8263eda8832SBarry Smith MatScalar **HD; 827329f5518SBarry Smith PetscReal tmp; 828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 8294a15367fSSatish Balay int ct=0,max=0; 8304a15367fSSatish Balay #endif 831fef45726SSatish Balay 832d64ed03dSBarry Smith PetscFunctionBegin; 8330bdbc534SSatish Balay baij->ht_size=(int)(factor*nz); 8340bdbc534SSatish Balay size = baij->ht_size; 835fef45726SSatish Balay 8360bdbc534SSatish Balay if (baij->ht) { 8370bdbc534SSatish Balay PetscFunctionReturn(0); 838596b8d2eSBarry Smith } 8390bdbc534SSatish Balay 840fef45726SSatish Balay /* Allocate Memory for Hash Table */ 8413eda8832SBarry Smith baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 842b9e4cc15SSatish Balay baij->ht = (int*)(baij->hd + size); 843b9e4cc15SSatish Balay HD = baij->hd; 844a07cd24cSSatish Balay HT = baij->ht; 845b9e4cc15SSatish Balay 846b9e4cc15SSatish Balay 847549d3d68SSatish Balay ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 8480bdbc534SSatish Balay 849596b8d2eSBarry Smith 850596b8d2eSBarry Smith /* Loop Over A */ 8510bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 852596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8530bdbc534SSatish Balay row = i+rstart; 8540bdbc534SSatish Balay col = aj[j]+cstart; 855596b8d2eSBarry Smith 856187ce0cbSSatish Balay key = row*Nbs + col + 1; 857c2760754SSatish Balay h1 = HASH(size,key,tmp); 8580bdbc534SSatish Balay for (k=0; k<size; k++){ 8590bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8600bdbc534SSatish Balay HT[(h1+k)%size] = key; 8610bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 862596b8d2eSBarry Smith break; 863aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 864187ce0cbSSatish Balay } else { 865187ce0cbSSatish Balay ct++; 866187ce0cbSSatish Balay #endif 867596b8d2eSBarry Smith } 868187ce0cbSSatish Balay } 869aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 870187ce0cbSSatish Balay if (k> max) max = k; 871187ce0cbSSatish Balay #endif 872596b8d2eSBarry Smith } 873596b8d2eSBarry Smith } 874596b8d2eSBarry Smith /* Loop Over B */ 8750bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 876596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 8770bdbc534SSatish Balay row = i+rstart; 8780bdbc534SSatish Balay col = garray[bj[j]]; 879187ce0cbSSatish Balay key = row*Nbs + col + 1; 880c2760754SSatish Balay h1 = HASH(size,key,tmp); 8810bdbc534SSatish Balay for (k=0; k<size; k++){ 8820bdbc534SSatish Balay if (HT[(h1+k)%size] == 0.0) { 8830bdbc534SSatish Balay HT[(h1+k)%size] = key; 8840bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 885596b8d2eSBarry Smith break; 886aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 887187ce0cbSSatish Balay } else { 888187ce0cbSSatish Balay ct++; 889187ce0cbSSatish Balay #endif 890596b8d2eSBarry Smith } 891187ce0cbSSatish Balay } 892aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 893187ce0cbSSatish Balay if (k> max) max = k; 894187ce0cbSSatish Balay #endif 895596b8d2eSBarry Smith } 896596b8d2eSBarry Smith } 897596b8d2eSBarry Smith 898596b8d2eSBarry Smith /* Print Summary */ 899aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 900c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 901596b8d2eSBarry Smith if (HT[i]) {j++;} 902c38d4ed2SBarry Smith } 903329f5518SBarry Smith PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 904187ce0cbSSatish Balay #endif 9053a40ed3dSBarry Smith PetscFunctionReturn(0); 906596b8d2eSBarry Smith } 90757b952d6SSatish Balay 908bbb85fb3SSatish Balay #undef __FUNC__ 909b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 910bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 911bbb85fb3SSatish Balay { 912bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 913ff2fd236SBarry Smith int ierr,nstash,reallocs; 914bbb85fb3SSatish Balay InsertMode addv; 915bbb85fb3SSatish Balay 916bbb85fb3SSatish Balay PetscFunctionBegin; 917bbb85fb3SSatish Balay if (baij->donotstash) { 918bbb85fb3SSatish Balay PetscFunctionReturn(0); 919bbb85fb3SSatish Balay } 920bbb85fb3SSatish Balay 921bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 922bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 923bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 924*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 925bbb85fb3SSatish Balay } 926bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 927bbb85fb3SSatish Balay 9288798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9298798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9308798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 9315a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 9328798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 9335a655dc6SBarry Smith PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 934bbb85fb3SSatish Balay PetscFunctionReturn(0); 935bbb85fb3SSatish Balay } 936bbb85fb3SSatish Balay 937bbb85fb3SSatish Balay #undef __FUNC__ 938b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 939bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 940bbb85fb3SSatish Balay { 941bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 942a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 943a2d1c673SSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 9447c922b88SBarry Smith int *row,*col,other_disassembled; 9457c922b88SBarry Smith PetscTruth r1,r2,r3; 9463eda8832SBarry Smith MatScalar *val; 947bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 948bbb85fb3SSatish Balay 949bbb85fb3SSatish Balay PetscFunctionBegin; 950bbb85fb3SSatish Balay if (!baij->donotstash) { 951a2d1c673SSatish Balay while (1) { 9528798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 953a2d1c673SSatish Balay if (!flg) break; 954a2d1c673SSatish Balay 955bbb85fb3SSatish Balay for (i=0; i<n;) { 956bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 957bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 958bbb85fb3SSatish Balay if (j < n) ncols = j-i; 959bbb85fb3SSatish Balay else ncols = n-i; 960bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 96193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 962bbb85fb3SSatish Balay i = j; 963bbb85fb3SSatish Balay } 964bbb85fb3SSatish Balay } 9658798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 966a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 967a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 968a2d1c673SSatish Balay restore the original flags */ 969a2d1c673SSatish Balay r1 = baij->roworiented; 970a2d1c673SSatish Balay r2 = a->roworiented; 971a2d1c673SSatish Balay r3 = b->roworiented; 9727c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 9737c922b88SBarry Smith a->roworiented = PETSC_FALSE; 9747c922b88SBarry Smith b->roworiented = PETSC_FALSE; 975a2d1c673SSatish Balay while (1) { 9768798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 977a2d1c673SSatish Balay if (!flg) break; 978a2d1c673SSatish Balay 979a2d1c673SSatish Balay for (i=0; i<n;) { 980a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 981a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 982a2d1c673SSatish Balay if (j < n) ncols = j-i; 983a2d1c673SSatish Balay else ncols = n-i; 98493fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 985a2d1c673SSatish Balay i = j; 986a2d1c673SSatish Balay } 987a2d1c673SSatish Balay } 9888798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 989a2d1c673SSatish Balay baij->roworiented = r1; 990a2d1c673SSatish Balay a->roworiented = r2; 991a2d1c673SSatish Balay b->roworiented = r3; 992bbb85fb3SSatish Balay } 993bbb85fb3SSatish Balay 994bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 995bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 996bbb85fb3SSatish Balay 997bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 998bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 999bbb85fb3SSatish Balay /* 1000bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1001bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1002bbb85fb3SSatish Balay */ 1003bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1004bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1005bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1006bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1007bbb85fb3SSatish Balay } 1008bbb85fb3SSatish Balay } 1009bbb85fb3SSatish Balay 1010bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1011bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1012bbb85fb3SSatish Balay } 1013bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1014bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1015bbb85fb3SSatish Balay 1016aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1017bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1018c38d4ed2SBarry Smith PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1019bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1020bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1021bbb85fb3SSatish Balay } 1022bbb85fb3SSatish Balay #endif 1023bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1024bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1025bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1026bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1027bbb85fb3SSatish Balay } 1028bbb85fb3SSatish Balay 1029606d414cSSatish Balay if (baij->rowvalues) { 1030606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1031606d414cSSatish Balay baij->rowvalues = 0; 1032606d414cSSatish Balay } 1033bbb85fb3SSatish Balay PetscFunctionReturn(0); 1034bbb85fb3SSatish Balay } 103557b952d6SSatish Balay 10365615d1e5SSatish Balay #undef __FUNC__ 1037b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 10387b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 103957b952d6SSatish Balay { 104057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 104177ed5343SBarry Smith int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 10426831982aSBarry Smith PetscTruth isascii,isdraw; 1043f14a1c24SBarry Smith Viewer sviewer; 104457b952d6SSatish Balay 1045d64ed03dSBarry Smith PetscFunctionBegin; 10466831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 10476831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 10480f5bd95cSBarry Smith if (isascii) { 1049d41123aaSBarry Smith ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1050639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 10514e220ebcSLois Curfman McInnes MatInfo info; 1052d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1053d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 10546831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 10554e220ebcSLois Curfman McInnes rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 10566831982aSBarry Smith baij->bs,(int)info.memory);CHKERRQ(ierr); 1057d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 10586831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1059d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 10606831982aSBarry Smith ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 10616831982aSBarry Smith ierr = ViewerFlush(viewer);CHKERRQ(ierr); 106257b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10633a40ed3dSBarry Smith PetscFunctionReturn(0); 1064d64ed03dSBarry Smith } else if (format == VIEWER_FORMAT_ASCII_INFO) { 10656831982aSBarry Smith ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 10663a40ed3dSBarry Smith PetscFunctionReturn(0); 106757b952d6SSatish Balay } 106857b952d6SSatish Balay } 106957b952d6SSatish Balay 10700f5bd95cSBarry Smith if (isdraw) { 107157b952d6SSatish Balay Draw draw; 107257b952d6SSatish Balay PetscTruth isnull; 107377ed5343SBarry Smith ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 10743a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 107557b952d6SSatish Balay } 107657b952d6SSatish Balay 107757b952d6SSatish Balay if (size == 1) { 107857b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1079d64ed03dSBarry Smith } else { 108057b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 108157b952d6SSatish Balay Mat A; 108257b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 10833eda8832SBarry Smith int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 10843eda8832SBarry Smith MatScalar *a; 108557b952d6SSatish Balay 108657b952d6SSatish Balay if (!rank) { 108755843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1088d64ed03dSBarry Smith } else { 108955843e3eSBarry Smith ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 109057b952d6SSatish Balay } 109157b952d6SSatish Balay PLogObjectParent(mat,A); 109257b952d6SSatish Balay 109357b952d6SSatish Balay /* copy over the A part */ 109457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 109557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 109657b952d6SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 109757b952d6SSatish Balay 109857b952d6SSatish Balay for (i=0; i<mbs; i++) { 109957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 110057b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 110157b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 110257b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 110357b952d6SSatish Balay for (k=0; k<bs; k++) { 110493fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1105cee3aa6bSSatish Balay col++; a += bs; 110657b952d6SSatish Balay } 110757b952d6SSatish Balay } 110857b952d6SSatish Balay } 110957b952d6SSatish Balay /* copy over the B part */ 111057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 111157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 111257b952d6SSatish Balay for (i=0; i<mbs; i++) { 111357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 111457b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 111557b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 111657b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 111757b952d6SSatish Balay for (k=0; k<bs; k++) { 111893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1119cee3aa6bSSatish Balay col++; a += bs; 112057b952d6SSatish Balay } 112157b952d6SSatish Balay } 112257b952d6SSatish Balay } 1123606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11246d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11256d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112655843e3eSBarry Smith /* 112755843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 112855843e3eSBarry Smith synchronized across all processors that share the Draw object 112955843e3eSBarry Smith */ 11306831982aSBarry Smith ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1131f14a1c24SBarry Smith if (!rank) { 11326831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 113357b952d6SSatish Balay } 1134f14a1c24SBarry Smith ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 113557b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 113657b952d6SSatish Balay } 11373a40ed3dSBarry Smith PetscFunctionReturn(0); 113857b952d6SSatish Balay } 113957b952d6SSatish Balay 11405615d1e5SSatish Balay #undef __FUNC__ 1141b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1142e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer) 114357b952d6SSatish Balay { 114457b952d6SSatish Balay int ierr; 11456831982aSBarry Smith PetscTruth isascii,isdraw,issocket,isbinary; 114657b952d6SSatish Balay 1147d64ed03dSBarry Smith PetscFunctionBegin; 11486831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 11496831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 11506831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 11516831982aSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 11520f5bd95cSBarry Smith if (isascii || isdraw || issocket || isbinary) { 11537b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 11545cd90555SBarry Smith } else { 1155*29bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 115657b952d6SSatish Balay } 11573a40ed3dSBarry Smith PetscFunctionReturn(0); 115857b952d6SSatish Balay } 115957b952d6SSatish Balay 11605615d1e5SSatish Balay #undef __FUNC__ 1161b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1162e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat) 116379bdfe76SSatish Balay { 116479bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 116579bdfe76SSatish Balay int ierr; 116679bdfe76SSatish Balay 1167d64ed03dSBarry Smith PetscFunctionBegin; 116898dd23e9SBarry Smith 116998dd23e9SBarry Smith if (mat->mapping) { 117098dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 117198dd23e9SBarry Smith } 117298dd23e9SBarry Smith if (mat->bmapping) { 117398dd23e9SBarry Smith ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 117498dd23e9SBarry Smith } 117561b13de0SBarry Smith if (mat->rmap) { 117661b13de0SBarry Smith ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 117761b13de0SBarry Smith } 117861b13de0SBarry Smith if (mat->cmap) { 117961b13de0SBarry Smith ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 118061b13de0SBarry Smith } 1181aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1182e1311b90SBarry Smith PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 118379bdfe76SSatish Balay #endif 118479bdfe76SSatish Balay 11858798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 11868798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 11878798bf22SSatish Balay 1188606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 118979bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 119079bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1191aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 11920f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 119348e59246SSatish Balay #else 1194606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 119548e59246SSatish Balay #endif 1196606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1197606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1198606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1199606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1200606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1201606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12026fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12036fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12046fa18ffdSBarry Smith #endif 1205606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 120679bdfe76SSatish Balay PLogObjectDestroy(mat); 120779bdfe76SSatish Balay PetscHeaderDestroy(mat); 12083a40ed3dSBarry Smith PetscFunctionReturn(0); 120979bdfe76SSatish Balay } 121079bdfe76SSatish Balay 12115615d1e5SSatish Balay #undef __FUNC__ 1212b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1213ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1214cee3aa6bSSatish Balay { 1215cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 121647b4a8eaSLois Curfman McInnes int ierr,nt; 1217cee3aa6bSSatish Balay 1218d64ed03dSBarry Smith PetscFunctionBegin; 1219e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 122047b4a8eaSLois Curfman McInnes if (nt != a->n) { 1221*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 122247b4a8eaSLois Curfman McInnes } 1223e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 122447b4a8eaSLois Curfman McInnes if (nt != a->m) { 1225*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 122647b4a8eaSLois Curfman McInnes } 122743a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1228f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 122943a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1230f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 123143a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12323a40ed3dSBarry Smith PetscFunctionReturn(0); 1233cee3aa6bSSatish Balay } 1234cee3aa6bSSatish Balay 12355615d1e5SSatish Balay #undef __FUNC__ 1236b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1237ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1238cee3aa6bSSatish Balay { 1239cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1240cee3aa6bSSatish Balay int ierr; 1241d64ed03dSBarry Smith 1242d64ed03dSBarry Smith PetscFunctionBegin; 124343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1244f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 124543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1246f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12473a40ed3dSBarry Smith PetscFunctionReturn(0); 1248cee3aa6bSSatish Balay } 1249cee3aa6bSSatish Balay 12505615d1e5SSatish Balay #undef __FUNC__ 1251b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 12527c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1253cee3aa6bSSatish Balay { 1254cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255cee3aa6bSSatish Balay int ierr; 1256cee3aa6bSSatish Balay 1257d64ed03dSBarry Smith PetscFunctionBegin; 1258cee3aa6bSSatish Balay /* do nondiagonal part */ 12597c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1260cee3aa6bSSatish Balay /* send it on its way */ 1261537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1262cee3aa6bSSatish Balay /* do local part */ 12637c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1264cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1265cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1266cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1267639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12683a40ed3dSBarry Smith PetscFunctionReturn(0); 1269cee3aa6bSSatish Balay } 1270cee3aa6bSSatish Balay 12715615d1e5SSatish Balay #undef __FUNC__ 1272b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 12737c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1274cee3aa6bSSatish Balay { 1275cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1276cee3aa6bSSatish Balay int ierr; 1277cee3aa6bSSatish Balay 1278d64ed03dSBarry Smith PetscFunctionBegin; 1279cee3aa6bSSatish Balay /* do nondiagonal part */ 12807c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1281cee3aa6bSSatish Balay /* send it on its way */ 1282537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1283cee3aa6bSSatish Balay /* do local part */ 12847c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1285cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1286cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1287cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1288537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 12893a40ed3dSBarry Smith PetscFunctionReturn(0); 1290cee3aa6bSSatish Balay } 1291cee3aa6bSSatish Balay 1292cee3aa6bSSatish Balay /* 1293cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1294cee3aa6bSSatish Balay diagonal block 1295cee3aa6bSSatish Balay */ 12965615d1e5SSatish Balay #undef __FUNC__ 1297b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1298ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1299cee3aa6bSSatish Balay { 1300cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 13013a40ed3dSBarry Smith int ierr; 1302d64ed03dSBarry Smith 1303d64ed03dSBarry Smith PetscFunctionBegin; 1304*29bbc08cSBarry Smith if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13053a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13063a40ed3dSBarry Smith PetscFunctionReturn(0); 1307cee3aa6bSSatish Balay } 1308cee3aa6bSSatish Balay 13095615d1e5SSatish Balay #undef __FUNC__ 1310b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1311ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1312cee3aa6bSSatish Balay { 1313cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1314cee3aa6bSSatish Balay int ierr; 1315d64ed03dSBarry Smith 1316d64ed03dSBarry Smith PetscFunctionBegin; 1317cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1318cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13193a40ed3dSBarry Smith PetscFunctionReturn(0); 1320cee3aa6bSSatish Balay } 1321026e39d0SSatish Balay 13225615d1e5SSatish Balay #undef __FUNC__ 1323b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1324ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 132557b952d6SSatish Balay { 132657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1327d64ed03dSBarry Smith 1328d64ed03dSBarry Smith PetscFunctionBegin; 1329bd7f49f5SSatish Balay if (m) *m = mat->M; 1330bd7f49f5SSatish Balay if (n) *n = mat->N; 13313a40ed3dSBarry Smith PetscFunctionReturn(0); 133257b952d6SSatish Balay } 133357b952d6SSatish Balay 13345615d1e5SSatish Balay #undef __FUNC__ 1335b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1336ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 133757b952d6SSatish Balay { 133857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1339d64ed03dSBarry Smith 1340d64ed03dSBarry Smith PetscFunctionBegin; 1341f830108cSBarry Smith *m = mat->m; *n = mat->n; 13423a40ed3dSBarry Smith PetscFunctionReturn(0); 134357b952d6SSatish Balay } 134457b952d6SSatish Balay 13455615d1e5SSatish Balay #undef __FUNC__ 1346b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1347ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 134857b952d6SSatish Balay { 134957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1350d64ed03dSBarry Smith 1351d64ed03dSBarry Smith PetscFunctionBegin; 1352a21fb8cbSBarry Smith if (m) *m = mat->rstart*mat->bs; 1353a21fb8cbSBarry Smith if (n) *n = mat->rend*mat->bs; 13543a40ed3dSBarry Smith PetscFunctionReturn(0); 135557b952d6SSatish Balay } 135657b952d6SSatish Balay 13575615d1e5SSatish Balay #undef __FUNC__ 1358b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1359acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1360acdf5bf4SSatish Balay { 1361acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1362acdf5bf4SSatish Balay Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1363acdf5bf4SSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1364d9d09a02SSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1365d9d09a02SSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1366acdf5bf4SSatish Balay 1367d64ed03dSBarry Smith PetscFunctionBegin; 1368*29bbc08cSBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1369acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1370acdf5bf4SSatish Balay 1371acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1372acdf5bf4SSatish Balay /* 1373acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1374acdf5bf4SSatish Balay */ 1375acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1376bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1377bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1378acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1379acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1380acdf5bf4SSatish Balay } 1381549d3d68SSatish Balay mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1382acdf5bf4SSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1383acdf5bf4SSatish Balay } 1384acdf5bf4SSatish Balay 1385*29bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1386d9d09a02SSatish Balay lrow = row - brstart; 1387acdf5bf4SSatish Balay 1388acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1389acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1390acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1391f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1392f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1393acdf5bf4SSatish Balay nztot = nzA + nzB; 1394acdf5bf4SSatish Balay 1395acdf5bf4SSatish Balay cmap = mat->garray; 1396acdf5bf4SSatish Balay if (v || idx) { 1397acdf5bf4SSatish Balay if (nztot) { 1398acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1399acdf5bf4SSatish Balay int imark = -1; 1400acdf5bf4SSatish Balay if (v) { 1401acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1402acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1403d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1404acdf5bf4SSatish Balay else break; 1405acdf5bf4SSatish Balay } 1406acdf5bf4SSatish Balay imark = i; 1407acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1408acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1409acdf5bf4SSatish Balay } 1410acdf5bf4SSatish Balay if (idx) { 1411acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1412acdf5bf4SSatish Balay if (imark > -1) { 1413acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1414bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1415acdf5bf4SSatish Balay } 1416acdf5bf4SSatish Balay } else { 1417acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1418d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1419d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1420acdf5bf4SSatish Balay else break; 1421acdf5bf4SSatish Balay } 1422acdf5bf4SSatish Balay imark = i; 1423acdf5bf4SSatish Balay } 1424d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1425d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1426acdf5bf4SSatish Balay } 1427d64ed03dSBarry Smith } else { 1428d212a18eSSatish Balay if (idx) *idx = 0; 1429d212a18eSSatish Balay if (v) *v = 0; 1430d212a18eSSatish Balay } 1431acdf5bf4SSatish Balay } 1432acdf5bf4SSatish Balay *nz = nztot; 1433f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1434f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14353a40ed3dSBarry Smith PetscFunctionReturn(0); 1436acdf5bf4SSatish Balay } 1437acdf5bf4SSatish Balay 14385615d1e5SSatish Balay #undef __FUNC__ 1439b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1440acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1441acdf5bf4SSatish Balay { 1442acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1443d64ed03dSBarry Smith 1444d64ed03dSBarry Smith PetscFunctionBegin; 1445acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1446*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1447acdf5bf4SSatish Balay } 1448acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14493a40ed3dSBarry Smith PetscFunctionReturn(0); 1450acdf5bf4SSatish Balay } 1451acdf5bf4SSatish Balay 14525615d1e5SSatish Balay #undef __FUNC__ 1453b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1454ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 14555a838052SSatish Balay { 14565a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1457d64ed03dSBarry Smith 1458d64ed03dSBarry Smith PetscFunctionBegin; 14595a838052SSatish Balay *bs = baij->bs; 14603a40ed3dSBarry Smith PetscFunctionReturn(0); 14615a838052SSatish Balay } 14625a838052SSatish Balay 14635615d1e5SSatish Balay #undef __FUNC__ 1464b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1465ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A) 146658667388SSatish Balay { 146758667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 146858667388SSatish Balay int ierr; 1469d64ed03dSBarry Smith 1470d64ed03dSBarry Smith PetscFunctionBegin; 147158667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 147258667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14733a40ed3dSBarry Smith PetscFunctionReturn(0); 147458667388SSatish Balay } 14750ac07820SSatish Balay 14765615d1e5SSatish Balay #undef __FUNC__ 1477b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1478ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14790ac07820SSatish Balay { 14804e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14814e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 14827d57db60SLois Curfman McInnes int ierr; 1483329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14840ac07820SSatish Balay 1485d64ed03dSBarry Smith PetscFunctionBegin; 14864e220ebcSLois Curfman McInnes info->block_size = (double)a->bs; 14874e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14880e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1489de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14904e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14910e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1492de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14930ac07820SSatish Balay if (flag == MAT_LOCAL) { 14944e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14954e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14964e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14974e220ebcSLois Curfman McInnes info->memory = isend[3]; 14984e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14990ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1500f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 15014e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15024e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15034e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15044e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15054e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15060ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1507f7cdd7c9SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 15084e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15094e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15104e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15114e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15124e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1513d41123aaSBarry Smith } else { 1514*29bbc08cSBarry Smith SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 15150ac07820SSatish Balay } 15165a5d4f66SBarry Smith info->rows_global = (double)a->M; 15175a5d4f66SBarry Smith info->columns_global = (double)a->N; 15185a5d4f66SBarry Smith info->rows_local = (double)a->m; 15195a5d4f66SBarry Smith info->columns_local = (double)a->N; 15204e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15214e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15224e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15233a40ed3dSBarry Smith PetscFunctionReturn(0); 15240ac07820SSatish Balay } 15250ac07820SSatish Balay 15265615d1e5SSatish Balay #undef __FUNC__ 1527b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1528ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op) 152958667388SSatish Balay { 153058667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 153198305bb5SBarry Smith int ierr; 153258667388SSatish Balay 1533d64ed03dSBarry Smith PetscFunctionBegin; 153458667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 153558667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 15366da5968aSLois Curfman McInnes op == MAT_COLUMNS_UNSORTED || 1537c2653b3dSLois Curfman McInnes op == MAT_COLUMNS_SORTED || 15384787f768SSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 15397c922b88SBarry Smith op == MAT_KEEP_ZEROED_ROWS || 15404787f768SSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 154198305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154298305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1543b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROW_ORIENTED) { 15447c922b88SBarry Smith a->roworiented = PETSC_TRUE; 154598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1547b1fbbac0SLois Curfman McInnes } else if (op == MAT_ROWS_SORTED || 15486da5968aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 154958667388SSatish Balay op == MAT_SYMMETRIC || 155058667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1551b51ba29fSSatish Balay op == MAT_YES_NEW_DIAGONALS || 155298305bb5SBarry Smith op == MAT_USE_HASH_TABLE) { 155358667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 155498305bb5SBarry Smith } else if (op == MAT_COLUMN_ORIENTED) { 15557c922b88SBarry Smith a->roworiented = PETSC_FALSE; 155698305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155798305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 15582b362799SSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 15597c922b88SBarry Smith a->donotstash = PETSC_TRUE; 1560d64ed03dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 1561*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1562133cdb44SSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 15637c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 1564d64ed03dSBarry Smith } else { 1565*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 1566d64ed03dSBarry Smith } 15673a40ed3dSBarry Smith PetscFunctionReturn(0); 156858667388SSatish Balay } 156958667388SSatish Balay 15705615d1e5SSatish Balay #undef __FUNC__ 1571b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1572ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15730ac07820SSatish Balay { 15740ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15750ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15760ac07820SSatish Balay Mat B; 157740011551SBarry Smith int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 15780ac07820SSatish Balay int bs=baij->bs,mbs=baij->mbs; 15793eda8832SBarry Smith MatScalar *a; 15800ac07820SSatish Balay 1581d64ed03dSBarry Smith PetscFunctionBegin; 1582*29bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 15837c922b88SBarry Smith ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 15840ac07820SSatish Balay 15850ac07820SSatish Balay /* copy over the A part */ 15860ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15870ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15880ac07820SSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 15890ac07820SSatish Balay 15900ac07820SSatish Balay for (i=0; i<mbs; i++) { 15910ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 15920ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 15930ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15940ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 15950ac07820SSatish Balay for (k=0; k<bs; k++) { 159693fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 15970ac07820SSatish Balay col++; a += bs; 15980ac07820SSatish Balay } 15990ac07820SSatish Balay } 16000ac07820SSatish Balay } 16010ac07820SSatish Balay /* copy over the B part */ 16020ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16030ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16040ac07820SSatish Balay for (i=0; i<mbs; i++) { 16050ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16060ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16070ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16080ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16090ac07820SSatish Balay for (k=0; k<bs; k++) { 161093fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16110ac07820SSatish Balay col++; a += bs; 16120ac07820SSatish Balay } 16130ac07820SSatish Balay } 16140ac07820SSatish Balay } 1615606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16160ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16170ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16180ac07820SSatish Balay 16197c922b88SBarry Smith if (matout) { 16200ac07820SSatish Balay *matout = B; 16210ac07820SSatish Balay } else { 1622f830108cSBarry Smith PetscOps *Abops; 1623cc2dc46cSBarry Smith MatOps Aops; 1624f830108cSBarry Smith 16250ac07820SSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 1626606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 16270ac07820SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 16280ac07820SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1629aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 16300f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1631b1fc9764SSatish Balay #else 1632606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1633b1fc9764SSatish Balay #endif 1634606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1635606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1636606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1637606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1638f830108cSBarry Smith 1639f830108cSBarry Smith /* 1640f830108cSBarry Smith This is horrible, horrible code. We need to keep the 1641f830108cSBarry Smith A pointers for the bops and ops but copy everything 1642f830108cSBarry Smith else from C. 1643f830108cSBarry Smith */ 1644f830108cSBarry Smith Abops = A->bops; 1645f830108cSBarry Smith Aops = A->ops; 1646549d3d68SSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1647f830108cSBarry Smith A->bops = Abops; 1648f830108cSBarry Smith A->ops = Aops; 1649f830108cSBarry Smith 16500ac07820SSatish Balay PetscHeaderDestroy(B); 16510ac07820SSatish Balay } 16523a40ed3dSBarry Smith PetscFunctionReturn(0); 16530ac07820SSatish Balay } 16540e95ebc0SSatish Balay 16555615d1e5SSatish Balay #undef __FUNC__ 1656b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 165736c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16580e95ebc0SSatish Balay { 165936c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 166036c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 16610e95ebc0SSatish Balay int ierr,s1,s2,s3; 16620e95ebc0SSatish Balay 1663d64ed03dSBarry Smith PetscFunctionBegin; 166436c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 166536c4a09eSSatish Balay if (rr) { 166636c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1667*29bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 166836c4a09eSSatish Balay /* Overlap communication with computation. */ 166936c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 167036c4a09eSSatish Balay } 16710e95ebc0SSatish Balay if (ll) { 16720e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1673*29bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1674a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16750e95ebc0SSatish Balay } 167636c4a09eSSatish Balay /* scale the diagonal block */ 167736c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 167836c4a09eSSatish Balay 167936c4a09eSSatish Balay if (rr) { 168036c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 168136c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1682a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 168336c4a09eSSatish Balay } 168436c4a09eSSatish Balay 16853a40ed3dSBarry Smith PetscFunctionReturn(0); 16860e95ebc0SSatish Balay } 16870e95ebc0SSatish Balay 16885615d1e5SSatish Balay #undef __FUNC__ 1689b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1690ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 16910ac07820SSatish Balay { 16920ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16930ac07820SSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1694a07cd24cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 16950ac07820SSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 16960ac07820SSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1697a07cd24cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 16980ac07820SSatish Balay MPI_Comm comm = A->comm; 16990ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17000ac07820SSatish Balay MPI_Status recv_status,*send_status; 17010ac07820SSatish Balay IS istmp; 17020ac07820SSatish Balay 1703d64ed03dSBarry Smith PetscFunctionBegin; 1704f14a1c24SBarry Smith ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 17050ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 17060ac07820SSatish Balay 17070ac07820SSatish Balay /* first count number of contributors to each processor */ 17080ac07820SSatish Balay nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1709549d3d68SSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1710549d3d68SSatish Balay procs = nprocs + size; 17110ac07820SSatish Balay owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 17120ac07820SSatish Balay for (i=0; i<N; i++) { 17130ac07820SSatish Balay idx = rows[i]; 17140ac07820SSatish Balay found = 0; 17150ac07820SSatish Balay for (j=0; j<size; j++) { 17160ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 17170ac07820SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 17180ac07820SSatish Balay } 17190ac07820SSatish Balay } 1720*29bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17210ac07820SSatish Balay } 17220ac07820SSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 17230ac07820SSatish Balay 17240ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 17256831982aSBarry Smith work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 17266831982aSBarry Smith ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 17270ac07820SSatish Balay nmax = work[rank]; 17286831982aSBarry Smith nrecvs = work[size+rank]; 1729606d414cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 17300ac07820SSatish Balay 17310ac07820SSatish Balay /* post receives: */ 1732d64ed03dSBarry Smith rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1733d64ed03dSBarry Smith recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 17340ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1735ca161407SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17360ac07820SSatish Balay } 17370ac07820SSatish Balay 17380ac07820SSatish Balay /* do sends: 17390ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17400ac07820SSatish Balay the ith processor 17410ac07820SSatish Balay */ 17420ac07820SSatish Balay svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1743ca161407SBarry Smith send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 17440ac07820SSatish Balay starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 17450ac07820SSatish Balay starts[0] = 0; 17460ac07820SSatish Balay for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17470ac07820SSatish Balay for (i=0; i<N; i++) { 17480ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17490ac07820SSatish Balay } 17506831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17510ac07820SSatish Balay 17520ac07820SSatish Balay starts[0] = 0; 17530ac07820SSatish Balay for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 17540ac07820SSatish Balay count = 0; 17550ac07820SSatish Balay for (i=0; i<size; i++) { 17560ac07820SSatish Balay if (procs[i]) { 1757ca161407SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17580ac07820SSatish Balay } 17590ac07820SSatish Balay } 1760606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17610ac07820SSatish Balay 17620ac07820SSatish Balay base = owners[rank]*bs; 17630ac07820SSatish Balay 17640ac07820SSatish Balay /* wait on receives */ 17650ac07820SSatish Balay lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 17660ac07820SSatish Balay source = lens + nrecvs; 17670ac07820SSatish Balay count = nrecvs; slen = 0; 17680ac07820SSatish Balay while (count) { 1769ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17700ac07820SSatish Balay /* unpack receives into our local space */ 1771ca161407SBarry Smith ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 17720ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17730ac07820SSatish Balay lens[imdex] = n; 17740ac07820SSatish Balay slen += n; 17750ac07820SSatish Balay count--; 17760ac07820SSatish Balay } 1777606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17780ac07820SSatish Balay 17790ac07820SSatish Balay /* move the data into the send scatter */ 17800ac07820SSatish Balay lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 17810ac07820SSatish Balay count = 0; 17820ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17830ac07820SSatish Balay values = rvalues + i*nmax; 17840ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17850ac07820SSatish Balay lrows[count++] = values[j] - base; 17860ac07820SSatish Balay } 17870ac07820SSatish Balay } 1788606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1789606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1790606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1791606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17920ac07820SSatish Balay 17930ac07820SSatish Balay /* actually zap the local rows */ 1794029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 17950ac07820SSatish Balay PLogObjectParent(A,istmp); 1796a07cd24cSSatish Balay 179772dacd9aSBarry Smith /* 179872dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 179972dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 180072dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180172dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180272dacd9aSBarry Smith 180372dacd9aSBarry Smith Contributed by: Mathew Knepley 180472dacd9aSBarry Smith */ 18059c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 18066fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 18079c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 18086fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 18099c957beeSSatish Balay } else if (diag) { 18106fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1811fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1812*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18146525c446SSatish Balay } 1815a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1816a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 18173f11ea53SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1818a07cd24cSSatish Balay } 1819a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18219c957beeSSatish Balay } else { 18226fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1823a07cd24cSSatish Balay } 18249c957beeSSatish Balay 18259c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1826606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1827a07cd24cSSatish Balay 18280ac07820SSatish Balay /* wait on sends */ 18290ac07820SSatish Balay if (nsends) { 1830d64ed03dSBarry Smith send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1831ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1832606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18330ac07820SSatish Balay } 1834606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1835606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18360ac07820SSatish Balay 18373a40ed3dSBarry Smith PetscFunctionReturn(0); 18380ac07820SSatish Balay } 183972dacd9aSBarry Smith 18405615d1e5SSatish Balay #undef __FUNC__ 1841b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1842ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A) 1843ba4ca20aSSatish Balay { 1844ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 184525fdafccSSatish Balay MPI_Comm comm = A->comm; 1846133cdb44SSatish Balay static int called = 0; 18473a40ed3dSBarry Smith int ierr; 1848ba4ca20aSSatish Balay 1849d64ed03dSBarry Smith PetscFunctionBegin; 18503a40ed3dSBarry Smith if (!a->rank) { 18513a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 185225fdafccSSatish Balay } 185325fdafccSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1854d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1855d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18563a40ed3dSBarry Smith PetscFunctionReturn(0); 1857ba4ca20aSSatish Balay } 18580ac07820SSatish Balay 18595615d1e5SSatish Balay #undef __FUNC__ 1860b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1861ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A) 1862bb5a7306SBarry Smith { 1863bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1864bb5a7306SBarry Smith int ierr; 1865d64ed03dSBarry Smith 1866d64ed03dSBarry Smith PetscFunctionBegin; 1867bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18683a40ed3dSBarry Smith PetscFunctionReturn(0); 1869bb5a7306SBarry Smith } 1870bb5a7306SBarry Smith 18712e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18720ac07820SSatish Balay 18737fc3c18eSBarry Smith #undef __FUNC__ 1874b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 18757fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18767fc3c18eSBarry Smith { 18777fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18787fc3c18eSBarry Smith Mat a,b,c,d; 18797fc3c18eSBarry Smith PetscTruth flg; 18807fc3c18eSBarry Smith int ierr; 18817fc3c18eSBarry Smith 18827fc3c18eSBarry Smith PetscFunctionBegin; 1883*29bbc08cSBarry Smith if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 18847fc3c18eSBarry Smith a = matA->A; b = matA->B; 18857fc3c18eSBarry Smith c = matB->A; d = matB->B; 18867fc3c18eSBarry Smith 18877fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 18887fc3c18eSBarry Smith if (flg == PETSC_TRUE) { 18897fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18907fc3c18eSBarry Smith } 18917fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18927fc3c18eSBarry Smith PetscFunctionReturn(0); 18937fc3c18eSBarry Smith } 18947fc3c18eSBarry Smith 189579bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1896cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1897cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1898cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1899cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1900cc2dc46cSBarry Smith MatMult_MPIBAIJ, 1901cc2dc46cSBarry Smith MatMultAdd_MPIBAIJ, 19027c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19037c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1904cc2dc46cSBarry Smith 0, 1905cc2dc46cSBarry Smith 0, 1906cc2dc46cSBarry Smith 0, 1907cc2dc46cSBarry Smith 0, 1908cc2dc46cSBarry Smith 0, 1909cc2dc46cSBarry Smith 0, 1910cc2dc46cSBarry Smith 0, 1911cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 1912cc2dc46cSBarry Smith MatGetInfo_MPIBAIJ, 19137fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1914cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1915cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1916cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 1917cc2dc46cSBarry Smith MatAssemblyBegin_MPIBAIJ, 1918cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1919cc2dc46cSBarry Smith 0, 1920cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1921cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 1922cc2dc46cSBarry Smith MatZeroRows_MPIBAIJ, 1923cc2dc46cSBarry Smith 0, 1924cc2dc46cSBarry Smith 0, 1925cc2dc46cSBarry Smith 0, 1926cc2dc46cSBarry Smith 0, 1927cc2dc46cSBarry Smith MatGetSize_MPIBAIJ, 1928cc2dc46cSBarry Smith MatGetLocalSize_MPIBAIJ, 1929cc2dc46cSBarry Smith MatGetOwnershipRange_MPIBAIJ, 1930cc2dc46cSBarry Smith 0, 1931cc2dc46cSBarry Smith 0, 1932cc2dc46cSBarry Smith 0, 1933cc2dc46cSBarry Smith 0, 19342e8a6d31SBarry Smith MatDuplicate_MPIBAIJ, 1935cc2dc46cSBarry Smith 0, 1936cc2dc46cSBarry Smith 0, 1937cc2dc46cSBarry Smith 0, 1938cc2dc46cSBarry Smith 0, 1939cc2dc46cSBarry Smith 0, 1940cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1941cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1942cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1943cc2dc46cSBarry Smith 0, 1944cc2dc46cSBarry Smith MatPrintHelp_MPIBAIJ, 1945cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1946cc2dc46cSBarry Smith 0, 1947cc2dc46cSBarry Smith 0, 1948cc2dc46cSBarry Smith 0, 1949cc2dc46cSBarry Smith MatGetBlockSize_MPIBAIJ, 1950cc2dc46cSBarry Smith 0, 1951cc2dc46cSBarry Smith 0, 1952cc2dc46cSBarry Smith 0, 1953cc2dc46cSBarry Smith 0, 1954cc2dc46cSBarry Smith 0, 1955cc2dc46cSBarry Smith 0, 1956cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1957cc2dc46cSBarry Smith 0, 1958cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 1959cc2dc46cSBarry Smith 0, 1960f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1961f14a1c24SBarry Smith MatView_MPIBAIJ, 1962cc2dc46cSBarry Smith MatGetMaps_Petsc}; 196379bdfe76SSatish Balay 19645ef9f2a5SBarry Smith 1965e18c124aSSatish Balay EXTERN_C_BEGIN 19665ef9f2a5SBarry Smith #undef __FUNC__ 1967b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 19685ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 19695ef9f2a5SBarry Smith { 19705ef9f2a5SBarry Smith PetscFunctionBegin; 19715ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 19725ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 19735ef9f2a5SBarry Smith PetscFunctionReturn(0); 19745ef9f2a5SBarry Smith } 1975e18c124aSSatish Balay EXTERN_C_END 197679bdfe76SSatish Balay 19775615d1e5SSatish Balay #undef __FUNC__ 1978b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 197979bdfe76SSatish Balay /*@C 198079bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 198179bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 198279bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 198379bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 198479bdfe76SSatish Balay performance can be increased by more than a factor of 50. 198579bdfe76SSatish Balay 1986db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1987db81eaa0SLois Curfman McInnes 198879bdfe76SSatish Balay Input Parameters: 1989db81eaa0SLois Curfman McInnes + comm - MPI communicator 199079bdfe76SSatish Balay . bs - size of blockk 199179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 199292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 199392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 199492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 199592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 199692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 1997be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1998be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 199979bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 200079bdfe76SSatish Balay submatrix (same for all local rows) 20017fc3c18eSBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 200292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2003db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 200492e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 200579bdfe76SSatish Balay submatrix (same for all local rows). 20067fc3c18eSBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 200792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 200892e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 200979bdfe76SSatish Balay 201079bdfe76SSatish Balay Output Parameter: 201179bdfe76SSatish Balay . A - the matrix 201279bdfe76SSatish Balay 2013db81eaa0SLois Curfman McInnes Options Database Keys: 2014db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2015db81eaa0SLois Curfman McInnes block calculations (much slower) 2016db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 2017494eafd4SBarry Smith . -mat_mpi - use the parallel matrix data structures even on one processor 2018494eafd4SBarry Smith (defaults to using SeqBAIJ format on one processor) 20193ffaccefSLois Curfman McInnes 2020b259b22eSLois Curfman McInnes Notes: 202179bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 202279bdfe76SSatish Balay (possibly both). 202379bdfe76SSatish Balay 2024be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2025be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2026be79a94dSBarry Smith 202779bdfe76SSatish Balay Storage Information: 202879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 202979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 203079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 203179bdfe76SSatish Balay local matrix (a rectangular submatrix). 203279bdfe76SSatish Balay 203379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 203479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 203579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 203679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 203779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 203879bdfe76SSatish Balay 203979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 204079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 204179bdfe76SSatish Balay 2042db81eaa0SLois Curfman McInnes .vb 2043db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2044db81eaa0SLois Curfman McInnes ------------------- 2045db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2046db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2047db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2048db81eaa0SLois Curfman McInnes ------------------- 2049db81eaa0SLois Curfman McInnes .ve 205079bdfe76SSatish Balay 205179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 205279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 205379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 205457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 205579bdfe76SSatish Balay 2056d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2057d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 205879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 205992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 206092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 20616da5968aSLois Curfman McInnes matrices. 206279bdfe76SSatish Balay 2063027ccd11SLois Curfman McInnes Level: intermediate 2064027ccd11SLois Curfman McInnes 206592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 206679bdfe76SSatish Balay 20673eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 206879bdfe76SSatish Balay @*/ 2069329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 207079bdfe76SSatish Balay { 207179bdfe76SSatish Balay Mat B; 207279bdfe76SSatish Balay Mat_MPIBAIJ *b; 2073f1af5d2fSBarry Smith int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2074f1af5d2fSBarry Smith PetscTruth flag1,flag2,flg; 207579bdfe76SSatish Balay 2076d64ed03dSBarry Smith PetscFunctionBegin; 2077962fb4a0SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2078962fb4a0SBarry Smith 2079*29bbc08cSBarry Smith if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2080*29bbc08cSBarry Smith if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz); 2081*29bbc08cSBarry Smith if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz); 20824fdb0a08SBarry Smith if (d_nnz) { 208336c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 2084*29bbc08cSBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 20854fdb0a08SBarry Smith } 20864fdb0a08SBarry Smith } 20874fdb0a08SBarry Smith if (o_nnz) { 208836c4a09eSSatish Balay for (i=0; i<m/bs; i++) { 2089*29bbc08cSBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 20904fdb0a08SBarry Smith } 20914fdb0a08SBarry Smith } 20923914022bSBarry Smith 2093d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2094494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2095494eafd4SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2096494eafd4SBarry Smith if (!flag1 && !flag2 && size == 1) { 20973914022bSBarry Smith if (M == PETSC_DECIDE) M = m; 20983914022bSBarry Smith if (N == PETSC_DECIDE) N = n; 20993914022bSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 21003a40ed3dSBarry Smith PetscFunctionReturn(0); 21013914022bSBarry Smith } 21023914022bSBarry Smith 210379bdfe76SSatish Balay *A = 0; 21043f1db9ecSBarry Smith PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 210579bdfe76SSatish Balay PLogObjectCreate(B); 210679bdfe76SSatish Balay B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2107549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2108549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 210990f02eecSBarry Smith B->mapping = 0; 211079bdfe76SSatish Balay B->factor = 0; 211179bdfe76SSatish Balay B->assembled = PETSC_FALSE; 211279bdfe76SSatish Balay 2113e0fa3b82SLois Curfman McInnes B->insertmode = NOT_SET_VALUES; 2114d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2115d132466eSBarry Smith ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 211679bdfe76SSatish Balay 21177c922b88SBarry Smith if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2118*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2119d64ed03dSBarry Smith } 2120a8c6a408SBarry Smith if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2121*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified"); 2122a8c6a408SBarry Smith } 2123a8c6a408SBarry Smith if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2124*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either N or n should be specified"); 2125a8c6a408SBarry Smith } 2126cee3aa6bSSatish Balay if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2127cee3aa6bSSatish Balay if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 212879bdfe76SSatish Balay 212979bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 213079bdfe76SSatish Balay work[0] = m; work[1] = n; 213179bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 2132ca161407SBarry Smith ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 213379bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 213479bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 213579bdfe76SSatish Balay } 213679bdfe76SSatish Balay if (m == PETSC_DECIDE) { 213779bdfe76SSatish Balay Mbs = M/bs; 2138*29bbc08cSBarry Smith if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize"); 213979bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 214079bdfe76SSatish Balay m = mbs*bs; 214179bdfe76SSatish Balay } 214279bdfe76SSatish Balay if (n == PETSC_DECIDE) { 214379bdfe76SSatish Balay Nbs = N/bs; 2144*29bbc08cSBarry Smith if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global cols must be divisible by blocksize"); 214579bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 214679bdfe76SSatish Balay n = nbs*bs; 214779bdfe76SSatish Balay } 2148a8c6a408SBarry Smith if (mbs*bs != m || nbs*bs != n) { 2149*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows, cols must be divisible by blocksize"); 2150a8c6a408SBarry Smith } 215179bdfe76SSatish Balay 215279bdfe76SSatish Balay b->m = m; B->m = m; 215379bdfe76SSatish Balay b->n = n; B->n = n; 215479bdfe76SSatish Balay b->N = N; B->N = N; 215579bdfe76SSatish Balay b->M = M; B->M = M; 215679bdfe76SSatish Balay b->bs = bs; 215779bdfe76SSatish Balay b->bs2 = bs*bs; 215879bdfe76SSatish Balay b->mbs = mbs; 215979bdfe76SSatish Balay b->nbs = nbs; 216079bdfe76SSatish Balay b->Mbs = Mbs; 216179bdfe76SSatish Balay b->Nbs = Nbs; 216279bdfe76SSatish Balay 2163c7fcc2eaSBarry Smith /* the information in the maps duplicates the information computed below, eventually 2164c7fcc2eaSBarry Smith we should remove the duplicate information that is not contained in the maps */ 21657e2c5f70SBarry Smith ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 21667e2c5f70SBarry Smith ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr); 2167c7fcc2eaSBarry Smith 216879bdfe76SSatish Balay /* build local table of row and column ownerships */ 2169ff2fd236SBarry Smith b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2170ff2fd236SBarry Smith PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 21710ac07820SSatish Balay b->cowners = b->rowners + b->size + 2; 2172ff2fd236SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2173ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 217479bdfe76SSatish Balay b->rowners[0] = 0; 217579bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 217679bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 217779bdfe76SSatish Balay } 2178ff2fd236SBarry Smith for (i=0; i<=b->size; i++) { 2179ff2fd236SBarry Smith b->rowners_bs[i] = b->rowners[i]*bs; 2180ff2fd236SBarry Smith } 218179bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 218279bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 21834fa0d573SSatish Balay b->rstart_bs = b->rstart * bs; 21844fa0d573SSatish Balay b->rend_bs = b->rend * bs; 21854fa0d573SSatish Balay 2186ca161407SBarry Smith ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 218779bdfe76SSatish Balay b->cowners[0] = 0; 218879bdfe76SSatish Balay for (i=2; i<=b->size; i++) { 218979bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 219079bdfe76SSatish Balay } 219179bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 219279bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 21934fa0d573SSatish Balay b->cstart_bs = b->cstart * bs; 21944fa0d573SSatish Balay b->cend_bs = b->cend * bs; 219579bdfe76SSatish Balay 2196a07cd24cSSatish Balay 219779bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2198029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 219979bdfe76SSatish Balay PLogObjectParent(B,b->A); 220079bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2201029af93fSBarry Smith ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 220279bdfe76SSatish Balay PLogObjectParent(B,b->B); 220379bdfe76SSatish Balay 220479bdfe76SSatish Balay /* build cache for off array entries formed */ 22057e2c5f70SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 22067e2c5f70SBarry Smith ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 22077c922b88SBarry Smith b->donotstash = PETSC_FALSE; 22087c922b88SBarry Smith b->colmap = PETSC_NULL; 22097c922b88SBarry Smith b->garray = PETSC_NULL; 22107c922b88SBarry Smith b->roworiented = PETSC_TRUE; 221179bdfe76SSatish Balay 22126fa18ffdSBarry Smith #if defined(PEYSC_USE_MAT_SINGLE) 22136fa18ffdSBarry Smith /* stuff for MatSetValues_XXX in single precision */ 22146fa18ffdSBarry Smith b->lensetvalues = 0; 22156fa18ffdSBarry Smith b->setvaluescopy = PETSC_NULL; 22166fa18ffdSBarry Smith #endif 22176fa18ffdSBarry Smith 221830793edcSSatish Balay /* stuff used in block assembly */ 221930793edcSSatish Balay b->barray = 0; 222030793edcSSatish Balay 222179bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 222279bdfe76SSatish Balay b->lvec = 0; 222379bdfe76SSatish Balay b->Mvctx = 0; 222479bdfe76SSatish Balay 222579bdfe76SSatish Balay /* stuff for MatGetRow() */ 222679bdfe76SSatish Balay b->rowindices = 0; 222779bdfe76SSatish Balay b->rowvalues = 0; 222879bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 222979bdfe76SSatish Balay 2230a07cd24cSSatish Balay /* hash table stuff */ 2231a07cd24cSSatish Balay b->ht = 0; 2232187ce0cbSSatish Balay b->hd = 0; 22330bdbc534SSatish Balay b->ht_size = 0; 22347c922b88SBarry Smith b->ht_flag = PETSC_FALSE; 223525fdafccSSatish Balay b->ht_fact = 0; 2236187ce0cbSSatish Balay b->ht_total_ct = 0; 2237187ce0cbSSatish Balay b->ht_insert_ct = 0; 2238a07cd24cSSatish Balay 223979bdfe76SSatish Balay *A = B; 2240133cdb44SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2241133cdb44SSatish Balay if (flg) { 2242133cdb44SSatish Balay double fact = 1.39; 2243133cdb44SSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2244f1af5d2fSBarry Smith ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2245133cdb44SSatish Balay if (fact <= 1.0) fact = 1.39; 2246133cdb44SSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2247133cdb44SSatish Balay PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2248133cdb44SSatish Balay } 2249f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 22507fc3c18eSBarry Smith "MatStoreValues_MPIBAIJ", 22510c97f09dSSatish Balay MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2252f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 22537fc3c18eSBarry Smith "MatRetrieveValues_MPIBAIJ", 22540c97f09dSSatish Balay MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2255f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 22565ef9f2a5SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 22570c97f09dSSatish Balay MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 22583a40ed3dSBarry Smith PetscFunctionReturn(0); 225979bdfe76SSatish Balay } 2260026e39d0SSatish Balay 22615615d1e5SSatish Balay #undef __FUNC__ 2262b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 22632e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 22640ac07820SSatish Balay { 22650ac07820SSatish Balay Mat mat; 22660ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2267f1af5d2fSBarry Smith int ierr,len=0; 2268f1af5d2fSBarry Smith PetscTruth flg; 22690ac07820SSatish Balay 2270d64ed03dSBarry Smith PetscFunctionBegin; 22710ac07820SSatish Balay *newmat = 0; 22723f1db9ecSBarry Smith PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 22730ac07820SSatish Balay PLogObjectCreate(mat); 22740ac07820SSatish Balay mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2275549d3d68SSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 22760ac07820SSatish Balay mat->factor = matin->factor; 22770ac07820SSatish Balay mat->assembled = PETSC_TRUE; 2278aef5e8e0SSatish Balay mat->insertmode = NOT_SET_VALUES; 22790ac07820SSatish Balay 22800ac07820SSatish Balay a->m = mat->m = oldmat->m; 22810ac07820SSatish Balay a->n = mat->n = oldmat->n; 22820ac07820SSatish Balay a->M = mat->M = oldmat->M; 22830ac07820SSatish Balay a->N = mat->N = oldmat->N; 22840ac07820SSatish Balay 22850ac07820SSatish Balay a->bs = oldmat->bs; 22860ac07820SSatish Balay a->bs2 = oldmat->bs2; 22870ac07820SSatish Balay a->mbs = oldmat->mbs; 22880ac07820SSatish Balay a->nbs = oldmat->nbs; 22890ac07820SSatish Balay a->Mbs = oldmat->Mbs; 22900ac07820SSatish Balay a->Nbs = oldmat->Nbs; 22910ac07820SSatish Balay 22920ac07820SSatish Balay a->rstart = oldmat->rstart; 22930ac07820SSatish Balay a->rend = oldmat->rend; 22940ac07820SSatish Balay a->cstart = oldmat->cstart; 22950ac07820SSatish Balay a->cend = oldmat->cend; 22960ac07820SSatish Balay a->size = oldmat->size; 22970ac07820SSatish Balay a->rank = oldmat->rank; 2298aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2299aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2300aef5e8e0SSatish Balay a->rowindices = 0; 23010ac07820SSatish Balay a->rowvalues = 0; 23020ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 230330793edcSSatish Balay a->barray = 0; 23043123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 23053123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 23063123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 23073123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 23080ac07820SSatish Balay 2309133cdb44SSatish Balay /* hash table stuff */ 2310133cdb44SSatish Balay a->ht = 0; 2311133cdb44SSatish Balay a->hd = 0; 2312133cdb44SSatish Balay a->ht_size = 0; 2313133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 231425fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2315133cdb44SSatish Balay a->ht_total_ct = 0; 2316133cdb44SSatish Balay a->ht_insert_ct = 0; 2317133cdb44SSatish Balay 2318133cdb44SSatish Balay 2319ff2fd236SBarry Smith a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2320ff2fd236SBarry Smith PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 23210ac07820SSatish Balay a->cowners = a->rowners + a->size + 2; 2322ff2fd236SBarry Smith a->rowners_bs = a->cowners + a->size + 2; 2323549d3d68SSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 23248798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 23258798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 23260ac07820SSatish Balay if (oldmat->colmap) { 2327aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 23280f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 232948e59246SSatish Balay #else 23300ac07820SSatish Balay a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 23310ac07820SSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2332549d3d68SSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 233348e59246SSatish Balay #endif 23340ac07820SSatish Balay } else a->colmap = 0; 23350ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 23360ac07820SSatish Balay a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 23370ac07820SSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 2338549d3d68SSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 23390ac07820SSatish Balay } else a->garray = 0; 23400ac07820SSatish Balay 23410ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 23420ac07820SSatish Balay PLogObjectParent(mat,a->lvec); 23430ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2344e18c124aSSatish Balay 23450ac07820SSatish Balay PLogObjectParent(mat,a->Mvctx); 23462e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 23470ac07820SSatish Balay PLogObjectParent(mat,a->A); 23482e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 23490ac07820SSatish Balay PLogObjectParent(mat,a->B); 23500ac07820SSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2351e03a110bSBarry Smith ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 23520ac07820SSatish Balay if (flg) { 23530ac07820SSatish Balay ierr = MatPrintHelp(mat);CHKERRQ(ierr); 23540ac07820SSatish Balay } 23550ac07820SSatish Balay *newmat = mat; 23563a40ed3dSBarry Smith PetscFunctionReturn(0); 23570ac07820SSatish Balay } 235857b952d6SSatish Balay 2359e090d566SSatish Balay #include "petscsys.h" 236057b952d6SSatish Balay 23615615d1e5SSatish Balay #undef __FUNC__ 2362b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 236357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 236457b952d6SSatish Balay { 236557b952d6SSatish Balay Mat A; 236657b952d6SSatish Balay int i,nz,ierr,j,rstart,rend,fd; 236757b952d6SSatish Balay Scalar *vals,*buf; 236857b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 236957b952d6SSatish Balay MPI_Status status; 2370cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 237157b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2372f1af5d2fSBarry Smith int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 237357b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 237457b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 237557b952d6SSatish Balay 2376d64ed03dSBarry Smith PetscFunctionBegin; 2377f1af5d2fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 237857b952d6SSatish Balay 2379d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2380d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 238157b952d6SSatish Balay if (!rank) { 238257b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2383e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2384*29bbc08cSBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2385d64ed03dSBarry Smith if (header[3] < 0) { 2386*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2387d64ed03dSBarry Smith } 23886c5fab8fSBarry Smith } 2389d64ed03dSBarry Smith 2390ca161407SBarry Smith ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 239157b952d6SSatish Balay M = header[1]; N = header[2]; 239257b952d6SSatish Balay 2393*29bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 239457b952d6SSatish Balay 239557b952d6SSatish Balay /* 239657b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 239757b952d6SSatish Balay divisible by the blocksize 239857b952d6SSatish Balay */ 239957b952d6SSatish Balay Mbs = M/bs; 240057b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 240157b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 240257b952d6SSatish Balay else Mbs++; 240357b952d6SSatish Balay if (extra_rows &&!rank) { 2404b0267e0aSLois Curfman McInnes PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 240557b952d6SSatish Balay } 2406537820f0SBarry Smith 240757b952d6SSatish Balay /* determine ownership of all rows */ 240857b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 240957b952d6SSatish Balay m = mbs * bs; 2410cee3aa6bSSatish Balay rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2411cee3aa6bSSatish Balay browners = rowners + size + 1; 2412ca161407SBarry Smith ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 241357b952d6SSatish Balay rowners[0] = 0; 2414cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2415cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 241657b952d6SSatish Balay rstart = rowners[rank]; 241757b952d6SSatish Balay rend = rowners[rank+1]; 241857b952d6SSatish Balay 241957b952d6SSatish Balay /* distribute row lengths to all processors */ 242057b952d6SSatish Balay locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 242157b952d6SSatish Balay if (!rank) { 242257b952d6SSatish Balay rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2423e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 242457b952d6SSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 242557b952d6SSatish Balay sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2426cee3aa6bSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2427ca161407SBarry Smith ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2428606d414cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2429d64ed03dSBarry Smith } else { 2430ca161407SBarry Smith ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 243157b952d6SSatish Balay } 243257b952d6SSatish Balay 243357b952d6SSatish Balay if (!rank) { 243457b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 243557b952d6SSatish Balay procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2436549d3d68SSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 243757b952d6SSatish Balay for (i=0; i<size; i++) { 243857b952d6SSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 243957b952d6SSatish Balay procsnz[i] += rowlengths[j]; 244057b952d6SSatish Balay } 244157b952d6SSatish Balay } 2442606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 244357b952d6SSatish Balay 244457b952d6SSatish Balay /* determine max buffer needed and allocate it */ 244557b952d6SSatish Balay maxnz = 0; 244657b952d6SSatish Balay for (i=0; i<size; i++) { 244757b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 244857b952d6SSatish Balay } 244957b952d6SSatish Balay cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 245057b952d6SSatish Balay 245157b952d6SSatish Balay /* read in my part of the matrix column indices */ 245257b952d6SSatish Balay nz = procsnz[0]; 245357b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 245457b952d6SSatish Balay mycols = ibuf; 2455cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2456e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2457cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2458cee3aa6bSSatish Balay 245957b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 246057b952d6SSatish Balay for (i=1; i<size-1; i++) { 246157b952d6SSatish Balay nz = procsnz[i]; 2462e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2463ca161407SBarry Smith ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 246457b952d6SSatish Balay } 246557b952d6SSatish Balay /* read in the stuff for the last proc */ 246657b952d6SSatish Balay if (size != 1) { 246757b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2468e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 246957b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2470ca161407SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 247157b952d6SSatish Balay } 2472606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2473d64ed03dSBarry Smith } else { 247457b952d6SSatish Balay /* determine buffer space needed for message */ 247557b952d6SSatish Balay nz = 0; 247657b952d6SSatish Balay for (i=0; i<m; i++) { 247757b952d6SSatish Balay nz += locrowlens[i]; 247857b952d6SSatish Balay } 247957b952d6SSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 248057b952d6SSatish Balay mycols = ibuf; 248157b952d6SSatish Balay /* receive message of column indices*/ 2482ca161407SBarry Smith ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2483ca161407SBarry Smith ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2484*29bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 248557b952d6SSatish Balay } 248657b952d6SSatish Balay 248757b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2488cee3aa6bSSatish Balay dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2489cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 249057b952d6SSatish Balay mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2491549d3d68SSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 249257b952d6SSatish Balay masked1 = mask + Mbs; 249357b952d6SSatish Balay masked2 = masked1 + Mbs; 249457b952d6SSatish Balay rowcount = 0; nzcount = 0; 249557b952d6SSatish Balay for (i=0; i<mbs; i++) { 249657b952d6SSatish Balay dcount = 0; 249757b952d6SSatish Balay odcount = 0; 249857b952d6SSatish Balay for (j=0; j<bs; j++) { 249957b952d6SSatish Balay kmax = locrowlens[rowcount]; 250057b952d6SSatish Balay for (k=0; k<kmax; k++) { 250157b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 250257b952d6SSatish Balay if (!mask[tmp]) { 250357b952d6SSatish Balay mask[tmp] = 1; 250457b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 250557b952d6SSatish Balay else masked1[dcount++] = tmp; 250657b952d6SSatish Balay } 250757b952d6SSatish Balay } 250857b952d6SSatish Balay rowcount++; 250957b952d6SSatish Balay } 2510cee3aa6bSSatish Balay 251157b952d6SSatish Balay dlens[i] = dcount; 251257b952d6SSatish Balay odlens[i] = odcount; 2513cee3aa6bSSatish Balay 251457b952d6SSatish Balay /* zero out the mask elements we set */ 251557b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 251657b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 251757b952d6SSatish Balay } 2518cee3aa6bSSatish Balay 251957b952d6SSatish Balay /* create our matrix */ 2520549d3d68SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 252157b952d6SSatish Balay A = *newmat; 25226d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 252357b952d6SSatish Balay 252457b952d6SSatish Balay if (!rank) { 252557b952d6SSatish Balay buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 252657b952d6SSatish Balay /* read in my part of the matrix numerical values */ 252757b952d6SSatish Balay nz = procsnz[0]; 252857b952d6SSatish Balay vals = buf; 2529cee3aa6bSSatish Balay mycols = ibuf; 2530cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2531e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2532cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2533537820f0SBarry Smith 253457b952d6SSatish Balay /* insert into matrix */ 253557b952d6SSatish Balay jj = rstart*bs; 253657b952d6SSatish Balay for (i=0; i<m; i++) { 2537b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 253857b952d6SSatish Balay mycols += locrowlens[i]; 253957b952d6SSatish Balay vals += locrowlens[i]; 254057b952d6SSatish Balay jj++; 254157b952d6SSatish Balay } 254257b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 254357b952d6SSatish Balay for (i=1; i<size-1; i++) { 254457b952d6SSatish Balay nz = procsnz[i]; 254557b952d6SSatish Balay vals = buf; 2546e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2547ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 254857b952d6SSatish Balay } 254957b952d6SSatish Balay /* the last proc */ 255057b952d6SSatish Balay if (size != 1){ 255157b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2552cee3aa6bSSatish Balay vals = buf; 2553e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 255457b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2555ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 255657b952d6SSatish Balay } 2557606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2558d64ed03dSBarry Smith } else { 255957b952d6SSatish Balay /* receive numeric values */ 256057b952d6SSatish Balay buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 256157b952d6SSatish Balay 256257b952d6SSatish Balay /* receive message of values*/ 256357b952d6SSatish Balay vals = buf; 2564cee3aa6bSSatish Balay mycols = ibuf; 2565ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2566ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2567*29bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 256857b952d6SSatish Balay 256957b952d6SSatish Balay /* insert into matrix */ 257057b952d6SSatish Balay jj = rstart*bs; 2571cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2572b48991e4SBarry Smith ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 257357b952d6SSatish Balay mycols += locrowlens[i]; 257457b952d6SSatish Balay vals += locrowlens[i]; 257557b952d6SSatish Balay jj++; 257657b952d6SSatish Balay } 257757b952d6SSatish Balay } 2578606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2579606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2580606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2581606d414cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2582606d414cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2583606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 25846d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25856d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25863a40ed3dSBarry Smith PetscFunctionReturn(0); 258757b952d6SSatish Balay } 258857b952d6SSatish Balay 2589133cdb44SSatish Balay #undef __FUNC__ 2590b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2591133cdb44SSatish Balay /*@ 2592133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2593133cdb44SSatish Balay 2594133cdb44SSatish Balay Input Parameters: 2595133cdb44SSatish Balay . mat - the matrix 2596133cdb44SSatish Balay . fact - factor 2597133cdb44SSatish Balay 2598fee21e36SBarry Smith Collective on Mat 2599fee21e36SBarry Smith 26008c890885SBarry Smith Level: advanced 26018c890885SBarry Smith 2602133cdb44SSatish Balay Notes: 2603133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2604133cdb44SSatish Balay 2605133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2606133cdb44SSatish Balay 2607133cdb44SSatish Balay .seealso: MatSetOption() 2608133cdb44SSatish Balay @*/ 2609329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2610133cdb44SSatish Balay { 261125fdafccSSatish Balay Mat_MPIBAIJ *baij; 2612133cdb44SSatish Balay 2613133cdb44SSatish Balay PetscFunctionBegin; 2614133cdb44SSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 261525fdafccSSatish Balay if (mat->type != MATMPIBAIJ) { 2616*29bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2617133cdb44SSatish Balay } 2618133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2619133cdb44SSatish Balay baij->ht_fact = fact; 2620133cdb44SSatish Balay PetscFunctionReturn(0); 2621133cdb44SSatish Balay } 2622