1be1d678aSKris Buschelman #define PETSCMAT_DLL 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 479bdfe76SSatish Balay 5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 1593fea6afSBarry Smith 1693fea6afSBarry Smith /* UGLY, ugly, ugly 1787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1893fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 1993fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2093fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2193fea6afSBarry Smith into the single precision data structures. 2293fea6afSBarry Smith */ 2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 24b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 2993fea6afSBarry Smith #else 306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3593fea6afSBarry Smith #endif 363b2fbd54SBarry Smith 374a2ae208SSatish Balay #undef __FUNCT__ 384a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 407843d17aSBarry Smith { 417843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42dfbe8321SBarry Smith PetscErrorCode ierr; 43b24ad042SBarry Smith PetscInt i; 4487828ca2SBarry Smith PetscScalar *va,*vb; 457843d17aSBarry Smith Vec vtmp; 467843d17aSBarry Smith 477843d17aSBarry Smith PetscFunctionBegin; 487843d17aSBarry Smith 497843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 501ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 517843d17aSBarry Smith 52899cda47SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 537843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 541ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 557843d17aSBarry Smith 56899cda47SBarry Smith for (i=0; i<A->rmap.n; i++){ 57273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 587843d17aSBarry Smith } 597843d17aSBarry Smith 601ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 611ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 637843d17aSBarry Smith 647843d17aSBarry Smith PetscFunctionReturn(0); 657843d17aSBarry Smith } 667843d17aSBarry Smith 677fc3c18eSBarry Smith EXTERN_C_BEGIN 684a2ae208SSatish Balay #undef __FUNCT__ 694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 717fc3c18eSBarry Smith { 727fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73dfbe8321SBarry Smith PetscErrorCode ierr; 747fc3c18eSBarry Smith 757fc3c18eSBarry Smith PetscFunctionBegin; 767fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 787fc3c18eSBarry Smith PetscFunctionReturn(0); 797fc3c18eSBarry Smith } 807fc3c18eSBarry Smith EXTERN_C_END 817fc3c18eSBarry Smith 827fc3c18eSBarry Smith EXTERN_C_BEGIN 834a2ae208SSatish Balay #undef __FUNCT__ 844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 867fc3c18eSBarry Smith { 877fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88dfbe8321SBarry Smith PetscErrorCode ierr; 897fc3c18eSBarry Smith 907fc3c18eSBarry Smith PetscFunctionBegin; 917fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 937fc3c18eSBarry Smith PetscFunctionReturn(0); 947fc3c18eSBarry Smith } 957fc3c18eSBarry Smith EXTERN_C_END 967fc3c18eSBarry Smith 97537820f0SBarry Smith /* 98537820f0SBarry Smith Local utility routine that creates a mapping from the global column 9957b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10057b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10157b952d6SSatish Balay length of colmap equals the global matrix length. 10257b952d6SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 10657b952d6SSatish Balay { 10757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10857b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 1096849ba73SBarry Smith PetscErrorCode ierr; 110899cda47SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 11157b952d6SSatish Balay 112d64ed03dSBarry Smith PetscFunctionBegin; 113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 114f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11548e59246SSatish Balay for (i=0; i<nbs; i++){ 1160f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11748e59246SSatish Balay } 11848e59246SSatish Balay #else 119b24ad042SBarry Smith ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 12052e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12348e59246SSatish Balay #endif 1243a40ed3dSBarry Smith PetscFunctionReturn(0); 12557b952d6SSatish Balay } 12657b952d6SSatish Balay 12780c1aa95SSatish Balay #define CHUNKSIZE 10 12880c1aa95SSatish Balay 129f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13080c1aa95SSatish Balay { \ 13180c1aa95SSatish Balay \ 13280c1aa95SSatish Balay brow = row/bs; \ 13380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13580c1aa95SSatish Balay bcol = col/bs; \ 13680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 137ab26458aSBarry Smith low = 0; high = nrow; \ 138ab26458aSBarry Smith while (high-low > 3) { \ 139ab26458aSBarry Smith t = (low+high)/2; \ 140ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 141ab26458aSBarry Smith else low = t; \ 142ab26458aSBarry Smith } \ 143ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 147eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 148eada6651SSatish Balay else *bap = value; \ 149ac7a638eSSatish Balay goto a_noinsert; \ 15080c1aa95SSatish Balay } \ 15180c1aa95SSatish Balay } \ 15289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 153085a36d4SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 15580c1aa95SSatish Balay N = nrow++ - 1; \ 15680c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 15780c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 15880c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1593eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16080c1aa95SSatish Balay } \ 1613eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 16280c1aa95SSatish Balay rp[_i] = bcol; \ 16380c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 164ac7a638eSSatish Balay a_noinsert:; \ 16580c1aa95SSatish Balay ailen[brow] = nrow; \ 16680c1aa95SSatish Balay } 16757b952d6SSatish Balay 168ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 169ac7a638eSSatish Balay { \ 170ac7a638eSSatish Balay brow = row/bs; \ 171ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 172ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 173ac7a638eSSatish Balay bcol = col/bs; \ 174ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 175ac7a638eSSatish Balay low = 0; high = nrow; \ 176ac7a638eSSatish Balay while (high-low > 3) { \ 177ac7a638eSSatish Balay t = (low+high)/2; \ 178ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 179ac7a638eSSatish Balay else low = t; \ 180ac7a638eSSatish Balay } \ 181ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 182ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 183ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 184ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 185ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 186ac7a638eSSatish Balay else *bap = value; \ 187ac7a638eSSatish Balay goto b_noinsert; \ 188ac7a638eSSatish Balay } \ 189ac7a638eSSatish Balay } \ 19089280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 191085a36d4SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 192421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 193085a36d4SBarry Smith CHKMEMQ;\ 194ac7a638eSSatish Balay N = nrow++ - 1; \ 195ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 196ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 197ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 1983eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199ac7a638eSSatish Balay } \ 2003eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 201ac7a638eSSatish Balay rp[_i] = bcol; \ 202ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 203ac7a638eSSatish Balay b_noinsert:; \ 204ac7a638eSSatish Balay bilen[brow] = nrow; \ 205ac7a638eSSatish Balay } 206ac7a638eSSatish Balay 20793fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2084a2ae208SSatish Balay #undef __FUNCT__ 2094a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 210b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 21157b952d6SSatish Balay { 2126fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 213dfbe8321SBarry Smith PetscErrorCode ierr; 214b24ad042SBarry Smith PetscInt i,N = m*n; 2156fa18ffdSBarry Smith MatScalar *vsingle; 21693fea6afSBarry Smith 21793fea6afSBarry Smith PetscFunctionBegin; 2186fa18ffdSBarry Smith if (N > b->setvalueslen) { 21905b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 22082502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2216fa18ffdSBarry Smith b->setvalueslen = N; 2226fa18ffdSBarry Smith } 2236fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2246fa18ffdSBarry Smith 22593fea6afSBarry Smith for (i=0; i<N; i++) { 22693fea6afSBarry Smith vsingle[i] = v[i]; 22793fea6afSBarry Smith } 22893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 22993fea6afSBarry Smith PetscFunctionReturn(0); 23093fea6afSBarry Smith } 23193fea6afSBarry Smith 2324a2ae208SSatish Balay #undef __FUNCT__ 2334a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 234b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 23593fea6afSBarry Smith { 2366fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 237dfbe8321SBarry Smith PetscErrorCode ierr; 238b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2396fa18ffdSBarry Smith MatScalar *vsingle; 24093fea6afSBarry Smith 24193fea6afSBarry Smith PetscFunctionBegin; 2426fa18ffdSBarry Smith if (N > b->setvalueslen) { 24305b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 24482502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2456fa18ffdSBarry Smith b->setvalueslen = N; 2466fa18ffdSBarry Smith } 2476fa18ffdSBarry Smith vsingle = b->setvaluescopy; 24893fea6afSBarry Smith for (i=0; i<N; i++) { 24993fea6afSBarry Smith vsingle[i] = v[i]; 25093fea6afSBarry Smith } 25193fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 25293fea6afSBarry Smith PetscFunctionReturn(0); 25393fea6afSBarry Smith } 2546fa18ffdSBarry Smith 2554a2ae208SSatish Balay #undef __FUNCT__ 2564a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 257b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2586fa18ffdSBarry Smith { 2596fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260dfbe8321SBarry Smith PetscErrorCode ierr; 261b24ad042SBarry Smith PetscInt i,N = m*n; 2626fa18ffdSBarry Smith MatScalar *vsingle; 2636fa18ffdSBarry Smith 2646fa18ffdSBarry Smith PetscFunctionBegin; 2656fa18ffdSBarry Smith if (N > b->setvalueslen) { 26605b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 26782502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2686fa18ffdSBarry Smith b->setvalueslen = N; 2696fa18ffdSBarry Smith } 2706fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2716fa18ffdSBarry Smith for (i=0; i<N; i++) { 2726fa18ffdSBarry Smith vsingle[i] = v[i]; 2736fa18ffdSBarry Smith } 2746fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2756fa18ffdSBarry Smith PetscFunctionReturn(0); 2766fa18ffdSBarry Smith } 2776fa18ffdSBarry Smith 2784a2ae208SSatish Balay #undef __FUNCT__ 2794a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 280b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2816fa18ffdSBarry Smith { 2826fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283dfbe8321SBarry Smith PetscErrorCode ierr; 284b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2856fa18ffdSBarry Smith MatScalar *vsingle; 2866fa18ffdSBarry Smith 2876fa18ffdSBarry Smith PetscFunctionBegin; 2886fa18ffdSBarry Smith if (N > b->setvalueslen) { 28905b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 29082502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2916fa18ffdSBarry Smith b->setvalueslen = N; 2926fa18ffdSBarry Smith } 2936fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2946fa18ffdSBarry Smith for (i=0; i<N; i++) { 2956fa18ffdSBarry Smith vsingle[i] = v[i]; 2966fa18ffdSBarry Smith } 2976fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2986fa18ffdSBarry Smith PetscFunctionReturn(0); 2996fa18ffdSBarry Smith } 30093fea6afSBarry Smith #endif 30193fea6afSBarry Smith 3024a2ae208SSatish Balay #undef __FUNCT__ 303e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 304b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 30593fea6afSBarry Smith { 30657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 30793fea6afSBarry Smith MatScalar value; 308273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 309dfbe8321SBarry Smith PetscErrorCode ierr; 310b24ad042SBarry Smith PetscInt i,j,row,col; 311899cda47SBarry Smith PetscInt rstart_orig=mat->rmap.rstart; 312899cda47SBarry Smith PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 313899cda47SBarry Smith PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 31457b952d6SSatish Balay 315eada6651SSatish Balay /* Some Variables required in the macro */ 31680c1aa95SSatish Balay Mat A = baij->A; 31780c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 318b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3193eda8832SBarry Smith MatScalar *aa=a->a; 320ac7a638eSSatish Balay 321ac7a638eSSatish Balay Mat B = baij->B; 322ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 323b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3243eda8832SBarry Smith MatScalar *ba=b->a; 325ac7a638eSSatish Balay 326b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 327b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 3283eda8832SBarry Smith MatScalar *ap,*bap; 32980c1aa95SSatish Balay 330d64ed03dSBarry Smith PetscFunctionBegin; 33157b952d6SSatish Balay for (i=0; i<m; i++) { 3325ef9f2a5SBarry Smith if (im[i] < 0) continue; 3332515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 334899cda47SBarry Smith if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 335639f9d9dSBarry Smith #endif 33657b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 33757b952d6SSatish Balay row = im[i] - rstart_orig; 33857b952d6SSatish Balay for (j=0; j<n; j++) { 33957b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 34057b952d6SSatish Balay col = in[j] - cstart_orig; 34157b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 342f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 34380c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 34473959e64SBarry Smith } else if (in[j] < 0) continue; 3452515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 346899cda47SBarry Smith else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 347639f9d9dSBarry Smith #endif 34857b952d6SSatish Balay else { 34957b952d6SSatish Balay if (mat->was_assembled) { 350905e6a2fSBarry Smith if (!baij->colmap) { 351905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 352905e6a2fSBarry Smith } 353aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3540f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 355bba1ac68SSatish Balay col = col - 1; 35648e59246SSatish Balay #else 357bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 35848e59246SSatish Balay #endif 35957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 36057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3618295de27SSatish Balay col = in[j]; 3629bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 3639bf004c3SSatish Balay B = baij->B; 3649bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 3659bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 3669bf004c3SSatish Balay ba=b->a; 367bba1ac68SSatish Balay } else col += in[j]%bs; 3688295de27SSatish Balay } else col = in[j]; 36957b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 37090da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 37190da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 37257b952d6SSatish Balay } 37357b952d6SSatish Balay } 374d64ed03dSBarry Smith } else { 37590f02eecSBarry Smith if (!baij->donotstash) { 376ff2fd236SBarry Smith if (roworiented) { 3776fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 378ff2fd236SBarry Smith } else { 3796fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 38057b952d6SSatish Balay } 38157b952d6SSatish Balay } 38257b952d6SSatish Balay } 38390f02eecSBarry Smith } 3843a40ed3dSBarry Smith PetscFunctionReturn(0); 38557b952d6SSatish Balay } 38657b952d6SSatish Balay 3874a2ae208SSatish Balay #undef __FUNCT__ 3884a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 389b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 390ab26458aSBarry Smith { 391ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 392f15d580aSBarry Smith const MatScalar *value; 393f15d580aSBarry Smith MatScalar *barray=baij->barray; 394273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 395dfbe8321SBarry Smith PetscErrorCode ierr; 396899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 397899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 398899cda47SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 399ab26458aSBarry Smith 400b16ae2b1SBarry Smith PetscFunctionBegin; 40130793edcSSatish Balay if(!barray) { 40282502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 40382502324SSatish Balay baij->barray = barray; 40430793edcSSatish Balay } 40530793edcSSatish Balay 406ab26458aSBarry Smith if (roworiented) { 407ab26458aSBarry Smith stepval = (n-1)*bs; 408ab26458aSBarry Smith } else { 409ab26458aSBarry Smith stepval = (m-1)*bs; 410ab26458aSBarry Smith } 411ab26458aSBarry Smith for (i=0; i<m; i++) { 4125ef9f2a5SBarry Smith if (im[i] < 0) continue; 4132515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 41477431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 415ab26458aSBarry Smith #endif 416ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 417ab26458aSBarry Smith row = im[i] - rstart; 418ab26458aSBarry Smith for (j=0; j<n; j++) { 41915b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 42015b57d14SSatish Balay if ((roworiented) && (n == 1)) { 421f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 42215b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 423f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 42415b57d14SSatish Balay } else { /* Here a copy is required */ 425ab26458aSBarry Smith if (roworiented) { 426ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 427ab26458aSBarry Smith } else { 428ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 429abef11f7SSatish Balay } 43047513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 43147513183SBarry Smith for (jj=0; jj<bs; jj++) { 43230793edcSSatish Balay *barray++ = *value++; 43347513183SBarry Smith } 43447513183SBarry Smith } 43530793edcSSatish Balay barray -=bs2; 43615b57d14SSatish Balay } 437abef11f7SSatish Balay 438abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 439abef11f7SSatish Balay col = in[j] - cstart; 44093fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 441ab26458aSBarry Smith } 4425ef9f2a5SBarry Smith else if (in[j] < 0) continue; 4432515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 44477431f27SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 445ab26458aSBarry Smith #endif 446ab26458aSBarry Smith else { 447ab26458aSBarry Smith if (mat->was_assembled) { 448ab26458aSBarry Smith if (!baij->colmap) { 449ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 450ab26458aSBarry Smith } 451a5eb4965SSatish Balay 4522515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 453aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 454b24ad042SBarry Smith { PetscInt data; 4550f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 45629bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 457fa46199cSSatish Balay } 45848e59246SSatish Balay #else 45929bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 460a5eb4965SSatish Balay #endif 46148e59246SSatish Balay #endif 462aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4630f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 464fa46199cSSatish Balay col = (col - 1)/bs; 46548e59246SSatish Balay #else 466a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 46748e59246SSatish Balay #endif 468ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 469ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 470ab26458aSBarry Smith col = in[j]; 471ab26458aSBarry Smith } 472ab26458aSBarry Smith } 473ab26458aSBarry Smith else col = in[j]; 47493fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 475ab26458aSBarry Smith } 476ab26458aSBarry Smith } 477d64ed03dSBarry Smith } else { 478ab26458aSBarry Smith if (!baij->donotstash) { 479ff2fd236SBarry Smith if (roworiented) { 4806fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 481ff2fd236SBarry Smith } else { 4826fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 483ff2fd236SBarry Smith } 484abef11f7SSatish Balay } 485ab26458aSBarry Smith } 486ab26458aSBarry Smith } 4873a40ed3dSBarry Smith PetscFunctionReturn(0); 488ab26458aSBarry Smith } 4896fa18ffdSBarry Smith 4900bdbc534SSatish Balay #define HASH_KEY 0.6180339887 491b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 492b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 493b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 4944a2ae208SSatish Balay #undef __FUNCT__ 4954a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 496b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 4970bdbc534SSatish Balay { 4980bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 499273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 500dfbe8321SBarry Smith PetscErrorCode ierr; 501b24ad042SBarry Smith PetscInt i,j,row,col; 502899cda47SBarry Smith PetscInt rstart_orig=mat->rmap.rstart; 503899cda47SBarry Smith PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 504899cda47SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 505329f5518SBarry Smith PetscReal tmp; 5063eda8832SBarry Smith MatScalar **HD = baij->hd,value; 5072515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 508b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5094a15367fSSatish Balay #endif 5100bdbc534SSatish Balay 5110bdbc534SSatish Balay PetscFunctionBegin; 5120bdbc534SSatish Balay 5130bdbc534SSatish Balay for (i=0; i<m; i++) { 5142515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 51529bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 516899cda47SBarry Smith if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5170bdbc534SSatish Balay #endif 5180bdbc534SSatish Balay row = im[i]; 519c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5200bdbc534SSatish Balay for (j=0; j<n; j++) { 5210bdbc534SSatish Balay col = in[j]; 5226fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 523b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 524c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 525c2760754SSatish Balay h1 = HASH(size,key,tmp); 5260bdbc534SSatish Balay 527c2760754SSatish Balay 528c2760754SSatish Balay idx = h1; 5292515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 530187ce0cbSSatish Balay insert_ct++; 531187ce0cbSSatish Balay total_ct++; 532187ce0cbSSatish Balay if (HT[idx] != key) { 533187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 534187ce0cbSSatish Balay if (idx == size) { 535187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 536187ce0cbSSatish Balay if (idx == h1) { 53777431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 538187ce0cbSSatish Balay } 539187ce0cbSSatish Balay } 540187ce0cbSSatish Balay } 541187ce0cbSSatish Balay #else 542c2760754SSatish Balay if (HT[idx] != key) { 543c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 544c2760754SSatish Balay if (idx == size) { 545c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 546c2760754SSatish Balay if (idx == h1) { 54777431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 548c2760754SSatish Balay } 549c2760754SSatish Balay } 550c2760754SSatish Balay } 551187ce0cbSSatish Balay #endif 552c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 553c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 554c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5550bdbc534SSatish Balay } 5560bdbc534SSatish Balay } else { 5570bdbc534SSatish Balay if (!baij->donotstash) { 558ff2fd236SBarry Smith if (roworiented) { 5598798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 560ff2fd236SBarry Smith } else { 5618798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5620bdbc534SSatish Balay } 5630bdbc534SSatish Balay } 5640bdbc534SSatish Balay } 5650bdbc534SSatish Balay } 5662515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 567187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 568187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 569187ce0cbSSatish Balay #endif 5700bdbc534SSatish Balay PetscFunctionReturn(0); 5710bdbc534SSatish Balay } 5720bdbc534SSatish Balay 5734a2ae208SSatish Balay #undef __FUNCT__ 5744a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 575b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 5760bdbc534SSatish Balay { 5770bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 578273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 579dfbe8321SBarry Smith PetscErrorCode ierr; 580b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 581899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 582899cda47SBarry Smith PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 583b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 584329f5518SBarry Smith PetscReal tmp; 5853eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 586f15d580aSBarry Smith const MatScalar *v_t,*value; 5872515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 588b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5894a15367fSSatish Balay #endif 5900bdbc534SSatish Balay 591d0a41580SSatish Balay PetscFunctionBegin; 592d0a41580SSatish Balay 5930bdbc534SSatish Balay if (roworiented) { 5940bdbc534SSatish Balay stepval = (n-1)*bs; 5950bdbc534SSatish Balay } else { 5960bdbc534SSatish Balay stepval = (m-1)*bs; 5970bdbc534SSatish Balay } 5980bdbc534SSatish Balay for (i=0; i<m; i++) { 5992515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 60077431f27SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 60177431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 6020bdbc534SSatish Balay #endif 6030bdbc534SSatish Balay row = im[i]; 604187ce0cbSSatish Balay v_t = v + i*bs2; 605c2760754SSatish Balay if (row >= rstart && row < rend) { 6060bdbc534SSatish Balay for (j=0; j<n; j++) { 6070bdbc534SSatish Balay col = in[j]; 6080bdbc534SSatish Balay 6090bdbc534SSatish Balay /* Look up into the Hash Table */ 610c2760754SSatish Balay key = row*Nbs+col+1; 611c2760754SSatish Balay h1 = HASH(size,key,tmp); 6120bdbc534SSatish Balay 613c2760754SSatish Balay idx = h1; 6142515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 615187ce0cbSSatish Balay total_ct++; 616187ce0cbSSatish Balay insert_ct++; 617187ce0cbSSatish Balay if (HT[idx] != key) { 618187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 619187ce0cbSSatish Balay if (idx == size) { 620187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 621187ce0cbSSatish Balay if (idx == h1) { 62277431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623187ce0cbSSatish Balay } 624187ce0cbSSatish Balay } 625187ce0cbSSatish Balay } 626187ce0cbSSatish Balay #else 627c2760754SSatish Balay if (HT[idx] != key) { 628c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 629c2760754SSatish Balay if (idx == size) { 630c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 631c2760754SSatish Balay if (idx == h1) { 63277431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 633c2760754SSatish Balay } 634c2760754SSatish Balay } 635c2760754SSatish Balay } 636187ce0cbSSatish Balay #endif 637c2760754SSatish Balay baij_a = HD[idx]; 6380bdbc534SSatish Balay if (roworiented) { 639c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 640187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 641187ce0cbSSatish Balay value = v_t; 642187ce0cbSSatish Balay v_t += bs; 643fef45726SSatish Balay if (addv == ADD_VALUES) { 644c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 645c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 646fef45726SSatish Balay baij_a[jj] += *value++; 647b4cc0f5aSSatish Balay } 648b4cc0f5aSSatish Balay } 649fef45726SSatish Balay } else { 650c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 651c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 652fef45726SSatish Balay baij_a[jj] = *value++; 653fef45726SSatish Balay } 654fef45726SSatish Balay } 655fef45726SSatish Balay } 6560bdbc534SSatish Balay } else { 6570bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 658fef45726SSatish Balay if (addv == ADD_VALUES) { 659b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6600bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 661fef45726SSatish Balay baij_a[jj] += *value++; 662fef45726SSatish Balay } 663fef45726SSatish Balay } 664fef45726SSatish Balay } else { 665fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 666fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 667fef45726SSatish Balay baij_a[jj] = *value++; 668fef45726SSatish Balay } 669b4cc0f5aSSatish Balay } 6700bdbc534SSatish Balay } 6710bdbc534SSatish Balay } 6720bdbc534SSatish Balay } 6730bdbc534SSatish Balay } else { 6740bdbc534SSatish Balay if (!baij->donotstash) { 6750bdbc534SSatish Balay if (roworiented) { 6768798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6770bdbc534SSatish Balay } else { 6788798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6790bdbc534SSatish Balay } 6800bdbc534SSatish Balay } 6810bdbc534SSatish Balay } 6820bdbc534SSatish Balay } 6832515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 684187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 685187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 686187ce0cbSSatish Balay #endif 6870bdbc534SSatish Balay PetscFunctionReturn(0); 6880bdbc534SSatish Balay } 689133cdb44SSatish Balay 6904a2ae208SSatish Balay #undef __FUNCT__ 6914a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 692b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 693d6de1c52SSatish Balay { 694d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6956849ba73SBarry Smith PetscErrorCode ierr; 696899cda47SBarry Smith PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 697899cda47SBarry Smith PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 698d6de1c52SSatish Balay 699133cdb44SSatish Balay PetscFunctionBegin; 700d6de1c52SSatish Balay for (i=0; i<m; i++) { 70177431f27SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 702899cda47SBarry Smith if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 703d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 704d6de1c52SSatish Balay row = idxm[i] - bsrstart; 705d6de1c52SSatish Balay for (j=0; j<n; j++) { 70677431f27SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 707899cda47SBarry Smith if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 708d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 709d6de1c52SSatish Balay col = idxn[j] - bscstart; 71098dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 711d64ed03dSBarry Smith } else { 712905e6a2fSBarry Smith if (!baij->colmap) { 713905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 714905e6a2fSBarry Smith } 715aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7160f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 717fa46199cSSatish Balay data --; 71848e59246SSatish Balay #else 71948e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 72048e59246SSatish Balay #endif 72148e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 722d9d09a02SSatish Balay else { 72348e59246SSatish Balay col = data + idxn[j]%bs; 72498dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 725d6de1c52SSatish Balay } 726d6de1c52SSatish Balay } 727d6de1c52SSatish Balay } 728d64ed03dSBarry Smith } else { 72929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 730d6de1c52SSatish Balay } 731d6de1c52SSatish Balay } 7323a40ed3dSBarry Smith PetscFunctionReturn(0); 733d6de1c52SSatish Balay } 734d6de1c52SSatish Balay 7354a2ae208SSatish Balay #undef __FUNCT__ 7364a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 737dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 738d6de1c52SSatish Balay { 739d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 741dfbe8321SBarry Smith PetscErrorCode ierr; 742899cda47SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 743329f5518SBarry Smith PetscReal sum = 0.0; 7443eda8832SBarry Smith MatScalar *v; 745d6de1c52SSatish Balay 746d64ed03dSBarry Smith PetscFunctionBegin; 747d6de1c52SSatish Balay if (baij->size == 1) { 748064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 749d6de1c52SSatish Balay } else { 750d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 751d6de1c52SSatish Balay v = amat->a; 7528a62d963SHong Zhang nz = amat->nz*bs2; 7538a62d963SHong Zhang for (i=0; i<nz; i++) { 754aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 755329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 756d6de1c52SSatish Balay #else 757d6de1c52SSatish Balay sum += (*v)*(*v); v++; 758d6de1c52SSatish Balay #endif 759d6de1c52SSatish Balay } 760d6de1c52SSatish Balay v = bmat->a; 7618a62d963SHong Zhang nz = bmat->nz*bs2; 7628a62d963SHong Zhang for (i=0; i<nz; i++) { 763aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 764329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 765d6de1c52SSatish Balay #else 766d6de1c52SSatish Balay sum += (*v)*(*v); v++; 767d6de1c52SSatish Balay #endif 768d6de1c52SSatish Balay } 769064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 770064f8208SBarry Smith *nrm = sqrt(*nrm); 7718a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 7728a62d963SHong Zhang PetscReal *tmp,*tmp2; 773899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 774899cda47SBarry Smith ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 775899cda47SBarry Smith tmp2 = tmp + mat->cmap.N; 776899cda47SBarry Smith ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 7778a62d963SHong Zhang v = amat->a; jj = amat->j; 7788a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 7798a62d963SHong Zhang for (j=0; j<bs; j++){ 7808a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 7818a62d963SHong Zhang for (row=0; row<bs; row++){ 7828a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7838a62d963SHong Zhang } 7848a62d963SHong Zhang } 7858a62d963SHong Zhang jj++; 7868a62d963SHong Zhang } 7878a62d963SHong Zhang v = bmat->a; jj = bmat->j; 7888a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 7898a62d963SHong Zhang for (j=0; j<bs; j++){ 7908a62d963SHong Zhang col = bs*garray[*jj] + j; 7918a62d963SHong Zhang for (row=0; row<bs; row++){ 7928a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7938a62d963SHong Zhang } 7948a62d963SHong Zhang } 7958a62d963SHong Zhang jj++; 7968a62d963SHong Zhang } 797899cda47SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 7988a62d963SHong Zhang *nrm = 0.0; 799899cda47SBarry Smith for (j=0; j<mat->cmap.N; j++) { 8008a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8018a62d963SHong Zhang } 8028a62d963SHong Zhang ierr = PetscFree(tmp);CHKERRQ(ierr); 8038a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 804577dd1f9SKris Buschelman PetscReal *sums; 805577dd1f9SKris Buschelman ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 8068a62d963SHong Zhang sum = 0.0; 8078a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 8088a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 8098a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 8108a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 8118a62d963SHong Zhang for (i=0; i<nz; i++) { 8128a62d963SHong Zhang for (col=0; col<bs; col++){ 8138a62d963SHong Zhang for (row=0; row<bs; row++){ 8148a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8158a62d963SHong Zhang } 8168a62d963SHong Zhang } 8178a62d963SHong Zhang } 8188a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 8198a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 8208a62d963SHong Zhang for (i=0; i<nz; i++) { 8218a62d963SHong Zhang for (col=0; col<bs; col++){ 8228a62d963SHong Zhang for (row=0; row<bs; row++){ 8238a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8248a62d963SHong Zhang } 8258a62d963SHong Zhang } 8268a62d963SHong Zhang } 8278a62d963SHong Zhang for (row=0; row<bs; row++){ 8288a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 8298a62d963SHong Zhang } 8308a62d963SHong Zhang } 8318a62d963SHong Zhang ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 832577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 833d64ed03dSBarry Smith } else { 83429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835d6de1c52SSatish Balay } 836d64ed03dSBarry Smith } 8373a40ed3dSBarry Smith PetscFunctionReturn(0); 838d6de1c52SSatish Balay } 83957b952d6SSatish Balay 840fef45726SSatish Balay /* 841fef45726SSatish Balay Creates the hash table, and sets the table 842fef45726SSatish Balay This table is created only once. 843fef45726SSatish Balay If new entried need to be added to the matrix 844fef45726SSatish Balay then the hash table has to be destroyed and 845fef45726SSatish Balay recreated. 846fef45726SSatish Balay */ 8474a2ae208SSatish Balay #undef __FUNCT__ 8484a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 849dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 850596b8d2eSBarry Smith { 851596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 852596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 853596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 854b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8556849ba73SBarry Smith PetscErrorCode ierr; 856899cda47SBarry Smith PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 857899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858b24ad042SBarry Smith PetscInt *HT,key; 8593eda8832SBarry Smith MatScalar **HD; 860329f5518SBarry Smith PetscReal tmp; 8616cf91177SBarry Smith #if defined(PETSC_USE_INFO) 862b24ad042SBarry Smith PetscInt ct=0,max=0; 8634a15367fSSatish Balay #endif 864fef45726SSatish Balay 865d64ed03dSBarry Smith PetscFunctionBegin; 866b24ad042SBarry Smith baij->ht_size=(PetscInt)(factor*nz); 8670bdbc534SSatish Balay size = baij->ht_size; 868fef45726SSatish Balay 8690bdbc534SSatish Balay if (baij->ht) { 8700bdbc534SSatish Balay PetscFunctionReturn(0); 871596b8d2eSBarry Smith } 8720bdbc534SSatish Balay 873fef45726SSatish Balay /* Allocate Memory for Hash Table */ 874b24ad042SBarry Smith ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875b24ad042SBarry Smith baij->ht = (PetscInt*)(baij->hd + size); 876b9e4cc15SSatish Balay HD = baij->hd; 877a07cd24cSSatish Balay HT = baij->ht; 878b9e4cc15SSatish Balay 879b9e4cc15SSatish Balay 880b24ad042SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8810bdbc534SSatish Balay 882596b8d2eSBarry Smith 883596b8d2eSBarry Smith /* Loop Over A */ 8840bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 885596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8860bdbc534SSatish Balay row = i+rstart; 8870bdbc534SSatish Balay col = aj[j]+cstart; 888596b8d2eSBarry Smith 889187ce0cbSSatish Balay key = row*Nbs + col + 1; 890c2760754SSatish Balay h1 = HASH(size,key,tmp); 8910bdbc534SSatish Balay for (k=0; k<size; k++){ 892958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 8930bdbc534SSatish Balay HT[(h1+k)%size] = key; 8940bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 895596b8d2eSBarry Smith break; 8966cf91177SBarry Smith #if defined(PETSC_USE_INFO) 897187ce0cbSSatish Balay } else { 898187ce0cbSSatish Balay ct++; 899187ce0cbSSatish Balay #endif 900596b8d2eSBarry Smith } 901187ce0cbSSatish Balay } 9026cf91177SBarry Smith #if defined(PETSC_USE_INFO) 903187ce0cbSSatish Balay if (k> max) max = k; 904187ce0cbSSatish Balay #endif 905596b8d2eSBarry Smith } 906596b8d2eSBarry Smith } 907596b8d2eSBarry Smith /* Loop Over B */ 9080bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 909596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9100bdbc534SSatish Balay row = i+rstart; 9110bdbc534SSatish Balay col = garray[bj[j]]; 912187ce0cbSSatish Balay key = row*Nbs + col + 1; 913c2760754SSatish Balay h1 = HASH(size,key,tmp); 9140bdbc534SSatish Balay for (k=0; k<size; k++){ 915958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9160bdbc534SSatish Balay HT[(h1+k)%size] = key; 9170bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 918596b8d2eSBarry Smith break; 9196cf91177SBarry Smith #if defined(PETSC_USE_INFO) 920187ce0cbSSatish Balay } else { 921187ce0cbSSatish Balay ct++; 922187ce0cbSSatish Balay #endif 923596b8d2eSBarry Smith } 924187ce0cbSSatish Balay } 9256cf91177SBarry Smith #if defined(PETSC_USE_INFO) 926187ce0cbSSatish Balay if (k> max) max = k; 927187ce0cbSSatish Balay #endif 928596b8d2eSBarry Smith } 929596b8d2eSBarry Smith } 930596b8d2eSBarry Smith 931596b8d2eSBarry Smith /* Print Summary */ 9326cf91177SBarry Smith #if defined(PETSC_USE_INFO) 933c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 934596b8d2eSBarry Smith if (HT[i]) {j++;} 935c38d4ed2SBarry Smith } 936ae15b995SBarry Smith ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 937187ce0cbSSatish Balay #endif 9383a40ed3dSBarry Smith PetscFunctionReturn(0); 939596b8d2eSBarry Smith } 94057b952d6SSatish Balay 9414a2ae208SSatish Balay #undef __FUNCT__ 9424a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944bbb85fb3SSatish Balay { 945bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946dfbe8321SBarry Smith PetscErrorCode ierr; 947b24ad042SBarry Smith PetscInt nstash,reallocs; 948bbb85fb3SSatish Balay InsertMode addv; 949bbb85fb3SSatish Balay 950bbb85fb3SSatish Balay PetscFunctionBegin; 951bbb85fb3SSatish Balay if (baij->donotstash) { 952bbb85fb3SSatish Balay PetscFunctionReturn(0); 953bbb85fb3SSatish Balay } 954bbb85fb3SSatish Balay 955bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 956bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 95829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959bbb85fb3SSatish Balay } 960bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 961bbb85fb3SSatish Balay 962899cda47SBarry Smith ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 963899cda47SBarry Smith ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 9648798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965ae15b995SBarry Smith ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 96646680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967ae15b995SBarry Smith ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 968bbb85fb3SSatish Balay PetscFunctionReturn(0); 969bbb85fb3SSatish Balay } 970bbb85fb3SSatish Balay 9714a2ae208SSatish Balay #undef __FUNCT__ 9724a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974bbb85fb3SSatish Balay { 975bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 97691c97fd4SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 9776849ba73SBarry Smith PetscErrorCode ierr; 978b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 979b24ad042SBarry Smith PetscInt *row,*col,other_disassembled; 9807c922b88SBarry Smith PetscTruth r1,r2,r3; 9813eda8832SBarry Smith MatScalar *val; 982bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 983b24ad042SBarry Smith PetscMPIInt n; 984bbb85fb3SSatish Balay 98591c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 986bbb85fb3SSatish Balay PetscFunctionBegin; 987bbb85fb3SSatish Balay if (!baij->donotstash) { 988a2d1c673SSatish Balay while (1) { 9898798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990a2d1c673SSatish Balay if (!flg) break; 991a2d1c673SSatish Balay 992bbb85fb3SSatish Balay for (i=0; i<n;) { 993bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 994bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995bbb85fb3SSatish Balay if (j < n) ncols = j-i; 996bbb85fb3SSatish Balay else ncols = n-i; 997bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 99893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999bbb85fb3SSatish Balay i = j; 1000bbb85fb3SSatish Balay } 1001bbb85fb3SSatish Balay } 10028798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1004a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1005a2d1c673SSatish Balay restore the original flags */ 1006a2d1c673SSatish Balay r1 = baij->roworiented; 1007a2d1c673SSatish Balay r2 = a->roworiented; 100891c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 10097c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10107c922b88SBarry Smith a->roworiented = PETSC_FALSE; 101191c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1012a2d1c673SSatish Balay while (1) { 10138798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014a2d1c673SSatish Balay if (!flg) break; 1015a2d1c673SSatish Balay 1016a2d1c673SSatish Balay for (i=0; i<n;) { 1017a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1018a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019a2d1c673SSatish Balay if (j < n) ncols = j-i; 1020a2d1c673SSatish Balay else ncols = n-i; 102193fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022a2d1c673SSatish Balay i = j; 1023a2d1c673SSatish Balay } 1024a2d1c673SSatish Balay } 10258798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026a2d1c673SSatish Balay baij->roworiented = r1; 1027a2d1c673SSatish Balay a->roworiented = r2; 102891c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1029bbb85fb3SSatish Balay } 1030bbb85fb3SSatish Balay 1031bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033bbb85fb3SSatish Balay 1034bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1035bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1036bbb85fb3SSatish Balay /* 1037bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1038bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1039bbb85fb3SSatish Balay */ 1040bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1043bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044bbb85fb3SSatish Balay } 1045bbb85fb3SSatish Balay } 1046bbb85fb3SSatish Balay 1047bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049bbb85fb3SSatish Balay } 105091c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1051bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053bbb85fb3SSatish Balay 10546cf91177SBarry Smith #if defined(PETSC_USE_INFO) 1055bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1056ae15b995SBarry Smith ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1057bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1058bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1059bbb85fb3SSatish Balay } 1060bbb85fb3SSatish Balay #endif 1061bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065bbb85fb3SSatish Balay } 1066bbb85fb3SSatish Balay 1067606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068606d414cSSatish Balay baij->rowvalues = 0; 1069bbb85fb3SSatish Balay PetscFunctionReturn(0); 1070bbb85fb3SSatish Balay } 107157b952d6SSatish Balay 10724a2ae208SSatish Balay #undef __FUNCT__ 10734a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10746849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107557b952d6SSatish Balay { 107657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1077dfbe8321SBarry Smith PetscErrorCode ierr; 1078b24ad042SBarry Smith PetscMPIInt size = baij->size,rank = baij->rank; 1079899cda47SBarry Smith PetscInt bs = mat->rmap.bs; 108032077d6dSBarry Smith PetscTruth iascii,isdraw; 1081b0a32e0cSBarry Smith PetscViewer sviewer; 1082f3ef73ceSBarry Smith PetscViewerFormat format; 108357b952d6SSatish Balay 1084d64ed03dSBarry Smith PetscFunctionBegin; 108532077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 108732077d6dSBarry Smith if (iascii) { 1088b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1089456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10904e220ebcSLois Curfman McInnes MatInfo info; 1091d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1092d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 109377431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1094899cda47SBarry Smith rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1095899cda47SBarry Smith mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1096d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 109777431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1098d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 109977431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 110157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 11023a40ed3dSBarry Smith PetscFunctionReturn(0); 1103fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 110477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 11053a40ed3dSBarry Smith PetscFunctionReturn(0); 110604929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 110704929863SHong Zhang PetscFunctionReturn(0); 110857b952d6SSatish Balay } 110957b952d6SSatish Balay } 111057b952d6SSatish Balay 11110f5bd95cSBarry Smith if (isdraw) { 1112b0a32e0cSBarry Smith PetscDraw draw; 111357b952d6SSatish Balay PetscTruth isnull; 1114b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 111657b952d6SSatish Balay } 111757b952d6SSatish Balay 111857b952d6SSatish Balay if (size == 1) { 1119873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 112057b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1121d64ed03dSBarry Smith } else { 112257b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 112357b952d6SSatish Balay Mat A; 112457b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1125899cda47SBarry Smith PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11263eda8832SBarry Smith MatScalar *a; 112757b952d6SSatish Balay 1128f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1129f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 1130f69a0ea3SMatthew Knepley ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 113157b952d6SSatish Balay if (!rank) { 1132f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1133d64ed03dSBarry Smith } else { 1134f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 113557b952d6SSatish Balay } 1136f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1137899cda47SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 113852e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 113957b952d6SSatish Balay 114057b952d6SSatish Balay /* copy over the A part */ 114157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 114257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 114457b952d6SSatish Balay 114557b952d6SSatish Balay for (i=0; i<mbs; i++) { 1146899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 114757b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 114857b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1149899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 115057b952d6SSatish Balay for (k=0; k<bs; k++) { 115193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1152cee3aa6bSSatish Balay col++; a += bs; 115357b952d6SSatish Balay } 115457b952d6SSatish Balay } 115557b952d6SSatish Balay } 115657b952d6SSatish Balay /* copy over the B part */ 115757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 115857b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 115957b952d6SSatish Balay for (i=0; i<mbs; i++) { 1160899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 116157b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 116257b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 116357b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 116457b952d6SSatish Balay for (k=0; k<bs; k++) { 116593fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1166cee3aa6bSSatish Balay col++; a += bs; 116757b952d6SSatish Balay } 116857b952d6SSatish Balay } 116957b952d6SSatish Balay } 1170606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11716d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11726d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117355843e3eSBarry Smith /* 117455843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1175b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 117655843e3eSBarry Smith */ 1177b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178f14a1c24SBarry Smith if (!rank) { 1179e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11806831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 118157b952d6SSatish Balay } 1182b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 118357b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 118457b952d6SSatish Balay } 11853a40ed3dSBarry Smith PetscFunctionReturn(0); 118657b952d6SSatish Balay } 118757b952d6SSatish Balay 11884a2ae208SSatish Balay #undef __FUNCT__ 11894a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1190dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 119157b952d6SSatish Balay { 1192dfbe8321SBarry Smith PetscErrorCode ierr; 119332077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 119457b952d6SSatish Balay 1195d64ed03dSBarry Smith PetscFunctionBegin; 119632077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 120032077d6dSBarry Smith if (iascii || isdraw || issocket || isbinary) { 12017b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12025cd90555SBarry Smith } else { 120379a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 120457b952d6SSatish Balay } 12053a40ed3dSBarry Smith PetscFunctionReturn(0); 120657b952d6SSatish Balay } 120757b952d6SSatish Balay 12084a2ae208SSatish Balay #undef __FUNCT__ 12094a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1210dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 121179bdfe76SSatish Balay { 121279bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1213dfbe8321SBarry Smith PetscErrorCode ierr; 121479bdfe76SSatish Balay 1215d64ed03dSBarry Smith PetscFunctionBegin; 1216aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1217899cda47SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 121879bdfe76SSatish Balay #endif 12198798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12208798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 122179bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 122279bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1223aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12240f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 122548e59246SSatish Balay #else 122605b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 122748e59246SSatish Balay #endif 122805b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1229606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1230606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 123105b42c5fSBarry Smith ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 123205b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 123305b42c5fSBarry Smith ierr = PetscFree(baij->hd);CHKERRQ(ierr); 12346fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 123505b42c5fSBarry Smith ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 12366fa18ffdSBarry Smith #endif 1237899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1238606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1239901853e0SKris Buschelman 1240dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1241901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1242901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1243901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1244901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1245aac34f13SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1246901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1247901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 12483a40ed3dSBarry Smith PetscFunctionReturn(0); 124979bdfe76SSatish Balay } 125079bdfe76SSatish Balay 12514a2ae208SSatish Balay #undef __FUNCT__ 12524a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1253dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1254cee3aa6bSSatish Balay { 1255cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1256dfbe8321SBarry Smith PetscErrorCode ierr; 1257b24ad042SBarry Smith PetscInt nt; 1258cee3aa6bSSatish Balay 1259d64ed03dSBarry Smith PetscFunctionBegin; 1260e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1261899cda47SBarry Smith if (nt != A->cmap.n) { 126229bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 126347b4a8eaSLois Curfman McInnes } 1264e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1265899cda47SBarry Smith if (nt != A->rmap.n) { 126629bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 126747b4a8eaSLois Curfman McInnes } 126843a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1269f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 127043a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 12723a40ed3dSBarry Smith PetscFunctionReturn(0); 1273cee3aa6bSSatish Balay } 1274cee3aa6bSSatish Balay 12754a2ae208SSatish Balay #undef __FUNCT__ 12764a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1277dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1278cee3aa6bSSatish Balay { 1279cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1280dfbe8321SBarry Smith PetscErrorCode ierr; 1281d64ed03dSBarry Smith 1282d64ed03dSBarry Smith PetscFunctionBegin; 128343a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1284f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 128543a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1286f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12873a40ed3dSBarry Smith PetscFunctionReturn(0); 1288cee3aa6bSSatish Balay } 1289cee3aa6bSSatish Balay 12904a2ae208SSatish Balay #undef __FUNCT__ 12914a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1292dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1293cee3aa6bSSatish Balay { 1294cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1295dfbe8321SBarry Smith PetscErrorCode ierr; 1296a5ff213dSBarry Smith PetscTruth merged; 1297cee3aa6bSSatish Balay 1298d64ed03dSBarry Smith PetscFunctionBegin; 1299a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1300cee3aa6bSSatish Balay /* do nondiagonal part */ 13017c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1302a5ff213dSBarry Smith if (!merged) { 1303cee3aa6bSSatish Balay /* send it on its way */ 1304537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1305cee3aa6bSSatish Balay /* do local part */ 13067c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1307cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1308a5ff213dSBarry Smith /* inserted in yy until the next line */ 1309639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310a5ff213dSBarry Smith } else { 1311a5ff213dSBarry Smith /* do local part */ 1312a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1313a5ff213dSBarry Smith /* send it on its way */ 1314a5ff213dSBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1316a5ff213dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1317a5ff213dSBarry Smith } 13183a40ed3dSBarry Smith PetscFunctionReturn(0); 1319cee3aa6bSSatish Balay } 1320cee3aa6bSSatish Balay 13214a2ae208SSatish Balay #undef __FUNCT__ 13224a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1323dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1324cee3aa6bSSatish Balay { 1325cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326dfbe8321SBarry Smith PetscErrorCode ierr; 1327cee3aa6bSSatish Balay 1328d64ed03dSBarry Smith PetscFunctionBegin; 1329cee3aa6bSSatish Balay /* do nondiagonal part */ 13307c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1331cee3aa6bSSatish Balay /* send it on its way */ 1332537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1333cee3aa6bSSatish Balay /* do local part */ 13347c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1336cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1337cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1338537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13393a40ed3dSBarry Smith PetscFunctionReturn(0); 1340cee3aa6bSSatish Balay } 1341cee3aa6bSSatish Balay 1342cee3aa6bSSatish Balay /* 1343cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1344cee3aa6bSSatish Balay diagonal block 1345cee3aa6bSSatish Balay */ 13464a2ae208SSatish Balay #undef __FUNCT__ 13474a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1348dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1349cee3aa6bSSatish Balay { 1350cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351dfbe8321SBarry Smith PetscErrorCode ierr; 1352d64ed03dSBarry Smith 1353d64ed03dSBarry Smith PetscFunctionBegin; 1354899cda47SBarry Smith if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13553a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13563a40ed3dSBarry Smith PetscFunctionReturn(0); 1357cee3aa6bSSatish Balay } 1358cee3aa6bSSatish Balay 13594a2ae208SSatish Balay #undef __FUNCT__ 13604a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1361f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1362cee3aa6bSSatish Balay { 1363cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1364dfbe8321SBarry Smith PetscErrorCode ierr; 1365d64ed03dSBarry Smith 1366d64ed03dSBarry Smith PetscFunctionBegin; 1367f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1368f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 13693a40ed3dSBarry Smith PetscFunctionReturn(0); 1370cee3aa6bSSatish Balay } 1371026e39d0SSatish Balay 13724a2ae208SSatish Balay #undef __FUNCT__ 13734a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1374b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1375acdf5bf4SSatish Balay { 1376acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 137787828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13786849ba73SBarry Smith PetscErrorCode ierr; 1379899cda47SBarry Smith PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1380899cda47SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1381899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1382acdf5bf4SSatish Balay 1383d64ed03dSBarry Smith PetscFunctionBegin; 1384abc0a331SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1385acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1386acdf5bf4SSatish Balay 1387acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1388acdf5bf4SSatish Balay /* 1389acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1390acdf5bf4SSatish Balay */ 1391acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1392b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1393bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1394acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1395acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1396acdf5bf4SSatish Balay } 1397b24ad042SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1398b24ad042SBarry Smith mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1399acdf5bf4SSatish Balay } 1400acdf5bf4SSatish Balay 140129bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1402d9d09a02SSatish Balay lrow = row - brstart; 1403acdf5bf4SSatish Balay 1404acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1405acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1406acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1407f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1408f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1409acdf5bf4SSatish Balay nztot = nzA + nzB; 1410acdf5bf4SSatish Balay 1411acdf5bf4SSatish Balay cmap = mat->garray; 1412acdf5bf4SSatish Balay if (v || idx) { 1413acdf5bf4SSatish Balay if (nztot) { 1414acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1415b24ad042SBarry Smith PetscInt imark = -1; 1416acdf5bf4SSatish Balay if (v) { 1417acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1418acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1419d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1420acdf5bf4SSatish Balay else break; 1421acdf5bf4SSatish Balay } 1422acdf5bf4SSatish Balay imark = i; 1423acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1424acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1425acdf5bf4SSatish Balay } 1426acdf5bf4SSatish Balay if (idx) { 1427acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1428acdf5bf4SSatish Balay if (imark > -1) { 1429acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1430bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1431acdf5bf4SSatish Balay } 1432acdf5bf4SSatish Balay } else { 1433acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1434d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1435d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1436acdf5bf4SSatish Balay else break; 1437acdf5bf4SSatish Balay } 1438acdf5bf4SSatish Balay imark = i; 1439acdf5bf4SSatish Balay } 1440d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1441d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1442acdf5bf4SSatish Balay } 1443d64ed03dSBarry Smith } else { 1444d212a18eSSatish Balay if (idx) *idx = 0; 1445d212a18eSSatish Balay if (v) *v = 0; 1446d212a18eSSatish Balay } 1447acdf5bf4SSatish Balay } 1448acdf5bf4SSatish Balay *nz = nztot; 1449f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14513a40ed3dSBarry Smith PetscFunctionReturn(0); 1452acdf5bf4SSatish Balay } 1453acdf5bf4SSatish Balay 14544a2ae208SSatish Balay #undef __FUNCT__ 14554a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1456b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1457acdf5bf4SSatish Balay { 1458acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1459d64ed03dSBarry Smith 1460d64ed03dSBarry Smith PetscFunctionBegin; 1461abc0a331SBarry Smith if (!baij->getrowactive) { 146229bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1463acdf5bf4SSatish Balay } 1464acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14653a40ed3dSBarry Smith PetscFunctionReturn(0); 1466acdf5bf4SSatish Balay } 1467acdf5bf4SSatish Balay 14684a2ae208SSatish Balay #undef __FUNCT__ 14694a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1470dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 147158667388SSatish Balay { 147258667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1473dfbe8321SBarry Smith PetscErrorCode ierr; 1474d64ed03dSBarry Smith 1475d64ed03dSBarry Smith PetscFunctionBegin; 147658667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 147758667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14783a40ed3dSBarry Smith PetscFunctionReturn(0); 147958667388SSatish Balay } 14800ac07820SSatish Balay 14814a2ae208SSatish Balay #undef __FUNCT__ 14824a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1483dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14840ac07820SSatish Balay { 14854e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14864e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1487dfbe8321SBarry Smith PetscErrorCode ierr; 1488329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14890ac07820SSatish Balay 1490d64ed03dSBarry Smith PetscFunctionBegin; 1491899cda47SBarry Smith info->block_size = (PetscReal)matin->rmap.bs; 14924e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14930e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1494de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14954e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14960e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1497de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14980ac07820SSatish Balay if (flag == MAT_LOCAL) { 14994e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15004e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15014e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15024e220ebcSLois Curfman McInnes info->memory = isend[3]; 15034e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15040ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1505d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 15064e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15074e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15084e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15094e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15104e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15110ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1512d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 15134e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15144e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15154e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15164e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15174e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1518d41123aaSBarry Smith } else { 151977431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15200ac07820SSatish Balay } 1521899cda47SBarry Smith info->rows_global = (PetscReal)A->rmap.N; 1522899cda47SBarry Smith info->columns_global = (PetscReal)A->cmap.N; 1523899cda47SBarry Smith info->rows_local = (PetscReal)A->rmap.N; 1524899cda47SBarry Smith info->columns_local = (PetscReal)A->cmap.N; 15254e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15264e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15274e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15283a40ed3dSBarry Smith PetscFunctionReturn(0); 15290ac07820SSatish Balay } 15300ac07820SSatish Balay 15314a2ae208SSatish Balay #undef __FUNCT__ 15324a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1533dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 153458667388SSatish Balay { 153558667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1536dfbe8321SBarry Smith PetscErrorCode ierr; 153758667388SSatish Balay 1538d64ed03dSBarry Smith PetscFunctionBegin; 153912c028f9SKris Buschelman switch (op) { 154012c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 154112c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 154212c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 154312c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 154412c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 154512c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 154612c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 154798305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154898305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 154912c028f9SKris Buschelman break; 155012c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15517c922b88SBarry Smith a->roworiented = PETSC_TRUE; 155298305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155398305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155412c028f9SKris Buschelman break; 155512c028f9SKris Buschelman case MAT_ROWS_SORTED: 155612c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 155712c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1558290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 155912c028f9SKris Buschelman break; 156012c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15617c922b88SBarry Smith a->roworiented = PETSC_FALSE; 156298305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 156398305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 156412c028f9SKris Buschelman break; 156512c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15667c922b88SBarry Smith a->donotstash = PETSC_TRUE; 156712c028f9SKris Buschelman break; 156812c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 156929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 157012c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15717c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 157212c028f9SKris Buschelman break; 157377e54ba9SKris Buschelman case MAT_SYMMETRIC: 157477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15752188ac68SBarry Smith case MAT_HERMITIAN: 15762188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 15772188ac68SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 15782188ac68SBarry Smith break; 15799a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 15809a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 15819a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 15829a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 1583290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 158477e54ba9SKris Buschelman break; 158512c028f9SKris Buschelman default: 1586ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1587d64ed03dSBarry Smith } 15883a40ed3dSBarry Smith PetscFunctionReturn(0); 158958667388SSatish Balay } 159058667388SSatish Balay 15914a2ae208SSatish Balay #undef __FUNCT__ 15924a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1593dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15940ac07820SSatish Balay { 15950ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15960ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15970ac07820SSatish Balay Mat B; 1598dfbe8321SBarry Smith PetscErrorCode ierr; 1599899cda47SBarry Smith PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1600899cda47SBarry Smith PetscInt bs=A->rmap.bs,mbs=baij->mbs; 16013eda8832SBarry Smith MatScalar *a; 16020ac07820SSatish Balay 1603d64ed03dSBarry Smith PetscFunctionBegin; 160429bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1605f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1606899cda47SBarry Smith ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1607f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1608899cda47SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 16090ac07820SSatish Balay 16100ac07820SSatish Balay /* copy over the A part */ 16110ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16120ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1613b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 16140ac07820SSatish Balay 16150ac07820SSatish Balay for (i=0; i<mbs; i++) { 1616899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 16170ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16180ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1619899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 16200ac07820SSatish Balay for (k=0; k<bs; k++) { 162193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16220ac07820SSatish Balay col++; a += bs; 16230ac07820SSatish Balay } 16240ac07820SSatish Balay } 16250ac07820SSatish Balay } 16260ac07820SSatish Balay /* copy over the B part */ 16270ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16280ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16290ac07820SSatish Balay for (i=0; i<mbs; i++) { 1630899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 16310ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16320ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16330ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16340ac07820SSatish Balay for (k=0; k<bs; k++) { 163593fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16360ac07820SSatish Balay col++; a += bs; 16370ac07820SSatish Balay } 16380ac07820SSatish Balay } 16390ac07820SSatish Balay } 1640606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16410ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16420ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16430ac07820SSatish Balay 16447c922b88SBarry Smith if (matout) { 16450ac07820SSatish Balay *matout = B; 16460ac07820SSatish Balay } else { 1647273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16480ac07820SSatish Balay } 16493a40ed3dSBarry Smith PetscFunctionReturn(0); 16500ac07820SSatish Balay } 16510e95ebc0SSatish Balay 16524a2ae208SSatish Balay #undef __FUNCT__ 16534a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1654dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16550e95ebc0SSatish Balay { 165636c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 165736c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1658dfbe8321SBarry Smith PetscErrorCode ierr; 1659b24ad042SBarry Smith PetscInt s1,s2,s3; 16600e95ebc0SSatish Balay 1661d64ed03dSBarry Smith PetscFunctionBegin; 166236c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 166336c4a09eSSatish Balay if (rr) { 166436c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 166529bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 166636c4a09eSSatish Balay /* Overlap communication with computation. */ 166736c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 166836c4a09eSSatish Balay } 16690e95ebc0SSatish Balay if (ll) { 16700e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 167129bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1672a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16730e95ebc0SSatish Balay } 167436c4a09eSSatish Balay /* scale the diagonal block */ 167536c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 167636c4a09eSSatish Balay 167736c4a09eSSatish Balay if (rr) { 167836c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 167936c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1680a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 168136c4a09eSSatish Balay } 168236c4a09eSSatish Balay 16833a40ed3dSBarry Smith PetscFunctionReturn(0); 16840e95ebc0SSatish Balay } 16850e95ebc0SSatish Balay 16864a2ae208SSatish Balay #undef __FUNCT__ 16874a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1688f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 16890ac07820SSatish Balay { 16900ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16916849ba73SBarry Smith PetscErrorCode ierr; 1692b24ad042SBarry Smith PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1693899cda47SBarry Smith PetscInt i,*owners = A->rmap.range; 1694b24ad042SBarry Smith PetscInt *nprocs,j,idx,nsends,row; 1695b24ad042SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 16966543fbbaSBarry Smith PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1697357c27ecSBarry Smith PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 16980ac07820SSatish Balay MPI_Comm comm = A->comm; 16990ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17000ac07820SSatish Balay MPI_Status recv_status,*send_status; 17016543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17026543fbbaSBarry Smith PetscTruth found = PETSC_FALSE; 17036543fbbaSBarry Smith #endif 17040ac07820SSatish Balay 1705d64ed03dSBarry Smith PetscFunctionBegin; 17060ac07820SSatish Balay /* first count number of contributors to each processor */ 1707b24ad042SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1708b24ad042SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1709b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 17106543fbbaSBarry Smith j = 0; 17110ac07820SSatish Balay for (i=0; i<N; i++) { 17126543fbbaSBarry Smith if (lastidx > (idx = rows[i])) j = 0; 17136543fbbaSBarry Smith lastidx = idx; 17146543fbbaSBarry Smith for (; j<size; j++) { 1715357c27ecSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 17166543fbbaSBarry Smith nprocs[2*j]++; 17176543fbbaSBarry Smith nprocs[2*j+1] = 1; 17186543fbbaSBarry Smith owner[i] = j; 17196543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17206543fbbaSBarry Smith found = PETSC_TRUE; 17216543fbbaSBarry Smith #endif 17226543fbbaSBarry Smith break; 17230ac07820SSatish Balay } 17240ac07820SSatish Balay } 17256543fbbaSBarry Smith #if defined(PETSC_DEBUG) 172629bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17276543fbbaSBarry Smith found = PETSC_FALSE; 17286543fbbaSBarry Smith #endif 17290ac07820SSatish Balay } 1730c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17310ac07820SSatish Balay 17320ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1733c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17340ac07820SSatish Balay 17350ac07820SSatish Balay /* post receives: */ 1736b24ad042SBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1737b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17380ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1739b24ad042SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17400ac07820SSatish Balay } 17410ac07820SSatish Balay 17420ac07820SSatish Balay /* do sends: 17430ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17440ac07820SSatish Balay the ith processor 17450ac07820SSatish Balay */ 1746b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1747b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1748b24ad042SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 17490ac07820SSatish Balay starts[0] = 0; 1750c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17510ac07820SSatish Balay for (i=0; i<N; i++) { 17520ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17530ac07820SSatish Balay } 17540ac07820SSatish Balay 17550ac07820SSatish Balay starts[0] = 0; 1756c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17570ac07820SSatish Balay count = 0; 17580ac07820SSatish Balay for (i=0; i<size; i++) { 1759c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1760b24ad042SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17610ac07820SSatish Balay } 17620ac07820SSatish Balay } 1763606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17640ac07820SSatish Balay 1765357c27ecSBarry Smith base = owners[rank]; 17660ac07820SSatish Balay 17670ac07820SSatish Balay /* wait on receives */ 1768b24ad042SBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 17690ac07820SSatish Balay source = lens + nrecvs; 17700ac07820SSatish Balay count = nrecvs; slen = 0; 17710ac07820SSatish Balay while (count) { 1772ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17730ac07820SSatish Balay /* unpack receives into our local space */ 1774b24ad042SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 17750ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17760ac07820SSatish Balay lens[imdex] = n; 17770ac07820SSatish Balay slen += n; 17780ac07820SSatish Balay count--; 17790ac07820SSatish Balay } 1780606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17810ac07820SSatish Balay 17820ac07820SSatish Balay /* move the data into the send scatter */ 1783b24ad042SBarry Smith ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 17840ac07820SSatish Balay count = 0; 17850ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17860ac07820SSatish Balay values = rvalues + i*nmax; 17870ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17880ac07820SSatish Balay lrows[count++] = values[j] - base; 17890ac07820SSatish Balay } 17900ac07820SSatish Balay } 1791606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1792606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1793606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1794606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17950ac07820SSatish Balay 17960ac07820SSatish Balay /* actually zap the local rows */ 179772dacd9aSBarry Smith /* 179872dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1799a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 180072dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180172dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180272dacd9aSBarry Smith 1803f4df32b1SMatthew Knepley Contributed by: Matthew Knepley 180472dacd9aSBarry Smith */ 18059c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1806f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1807899cda47SBarry Smith if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1808f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1809f4df32b1SMatthew Knepley } else if (diag != 0.0) { 1810f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1811fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 181229bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18146525c446SSatish Balay } 1815a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1816a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1817f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1818a07cd24cSSatish Balay } 1819a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18219c957beeSSatish Balay } else { 1822f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1823a07cd24cSSatish Balay } 18249c957beeSSatish Balay 1825606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1826a07cd24cSSatish Balay 18270ac07820SSatish Balay /* wait on sends */ 18280ac07820SSatish Balay if (nsends) { 182982502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1830ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1831606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18320ac07820SSatish Balay } 1833606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1834606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18350ac07820SSatish Balay 18363a40ed3dSBarry Smith PetscFunctionReturn(0); 18370ac07820SSatish Balay } 183872dacd9aSBarry Smith 18394a2ae208SSatish Balay #undef __FUNCT__ 18404a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1841dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1842bb5a7306SBarry Smith { 1843bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1844dfbe8321SBarry Smith PetscErrorCode ierr; 1845d64ed03dSBarry Smith 1846d64ed03dSBarry Smith PetscFunctionBegin; 1847bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18483a40ed3dSBarry Smith PetscFunctionReturn(0); 1849bb5a7306SBarry Smith } 1850bb5a7306SBarry Smith 18516849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18520ac07820SSatish Balay 18534a2ae208SSatish Balay #undef __FUNCT__ 18544a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1855dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18567fc3c18eSBarry Smith { 18577fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18587fc3c18eSBarry Smith Mat a,b,c,d; 18597fc3c18eSBarry Smith PetscTruth flg; 1860dfbe8321SBarry Smith PetscErrorCode ierr; 18617fc3c18eSBarry Smith 18627fc3c18eSBarry Smith PetscFunctionBegin; 18637fc3c18eSBarry Smith a = matA->A; b = matA->B; 18647fc3c18eSBarry Smith c = matB->A; d = matB->B; 18657fc3c18eSBarry Smith 18667fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1867abc0a331SBarry Smith if (flg) { 18687fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18697fc3c18eSBarry Smith } 18707fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18717fc3c18eSBarry Smith PetscFunctionReturn(0); 18727fc3c18eSBarry Smith } 18737fc3c18eSBarry Smith 18743c896bc6SHong Zhang #undef __FUNCT__ 18753c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ" 18763c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 18773c896bc6SHong Zhang { 18783c896bc6SHong Zhang PetscErrorCode ierr; 18793c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 18803c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 18813c896bc6SHong Zhang 18823c896bc6SHong Zhang PetscFunctionBegin; 18833c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 18843c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 18853c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 18863c896bc6SHong Zhang } else { 18873c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 18883c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 18893c896bc6SHong Zhang } 18903c896bc6SHong Zhang PetscFunctionReturn(0); 18913c896bc6SHong Zhang } 1892273d9f13SBarry Smith 18934a2ae208SSatish Balay #undef __FUNCT__ 18944a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1895dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1896273d9f13SBarry Smith { 1897dfbe8321SBarry Smith PetscErrorCode ierr; 1898273d9f13SBarry Smith 1899273d9f13SBarry Smith PetscFunctionBegin; 19007edd0491SSatish Balay ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1901273d9f13SBarry Smith PetscFunctionReturn(0); 1902273d9f13SBarry Smith } 1903273d9f13SBarry Smith 19044fe895cdSHong Zhang #include "petscblaslapack.h" 19054fe895cdSHong Zhang #undef __FUNCT__ 19064fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ" 19074fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 19084fe895cdSHong Zhang { 19094fe895cdSHong Zhang PetscErrorCode ierr; 19104fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 19114fe895cdSHong Zhang PetscBLASInt bnz,one=1; 19124fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 19134fe895cdSHong Zhang 19144fe895cdSHong Zhang PetscFunctionBegin; 19154fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 19164fe895cdSHong Zhang PetscScalar alpha = a; 19174fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->A->data; 19184fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->A->data; 19194fe895cdSHong Zhang bnz = (PetscBLASInt)x->nz; 19204fe895cdSHong Zhang BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 19214fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->B->data; 19224fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->B->data; 19234fe895cdSHong Zhang bnz = (PetscBLASInt)x->nz; 19244fe895cdSHong Zhang BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 19254fe895cdSHong Zhang } else { 19264fe895cdSHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 19274fe895cdSHong Zhang } 19284fe895cdSHong Zhang PetscFunctionReturn(0); 19294fe895cdSHong Zhang } 19304fe895cdSHong Zhang 193199cafbc1SBarry Smith #undef __FUNCT__ 193299cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ" 193399cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 193499cafbc1SBarry Smith { 193599cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 193699cafbc1SBarry Smith PetscErrorCode ierr; 193799cafbc1SBarry Smith 193899cafbc1SBarry Smith PetscFunctionBegin; 193999cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 194099cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 194199cafbc1SBarry Smith PetscFunctionReturn(0); 194299cafbc1SBarry Smith } 194399cafbc1SBarry Smith 194499cafbc1SBarry Smith #undef __FUNCT__ 194599cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 194699cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 194799cafbc1SBarry Smith { 194899cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 194999cafbc1SBarry Smith PetscErrorCode ierr; 195099cafbc1SBarry Smith 195199cafbc1SBarry Smith PetscFunctionBegin; 195299cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 195399cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 195499cafbc1SBarry Smith PetscFunctionReturn(0); 195599cafbc1SBarry Smith } 195699cafbc1SBarry Smith 195779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1958cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1959cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1960cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1961cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1962cc2dc46cSBarry Smith MatMult_MPIBAIJ, 196397304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 19647c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19657c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1966cc2dc46cSBarry Smith 0, 1967cc2dc46cSBarry Smith 0, 1968cc2dc46cSBarry Smith 0, 196997304618SKris Buschelman /*10*/ 0, 1970cc2dc46cSBarry Smith 0, 1971cc2dc46cSBarry Smith 0, 1972cc2dc46cSBarry Smith 0, 1973cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 197497304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 19757fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1976cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1977cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1978cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 197997304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1980cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1981cc2dc46cSBarry Smith 0, 1982cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1983cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 198497304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 1985cc2dc46cSBarry Smith 0, 1986cc2dc46cSBarry Smith 0, 1987cc2dc46cSBarry Smith 0, 1988cc2dc46cSBarry Smith 0, 198997304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 1990273d9f13SBarry Smith 0, 1991cc2dc46cSBarry Smith 0, 1992cc2dc46cSBarry Smith 0, 1993cc2dc46cSBarry Smith 0, 199497304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 1995cc2dc46cSBarry Smith 0, 1996cc2dc46cSBarry Smith 0, 1997cc2dc46cSBarry Smith 0, 1998cc2dc46cSBarry Smith 0, 19994fe895cdSHong Zhang /*40*/ MatAXPY_MPIBAIJ, 2000cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 2001cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2002cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 20033c896bc6SHong Zhang MatCopy_MPIBAIJ, 20048c07d4e3SBarry Smith /*45*/ 0, 2005cc2dc46cSBarry Smith MatScale_MPIBAIJ, 2006cc2dc46cSBarry Smith 0, 2007cc2dc46cSBarry Smith 0, 2008cc2dc46cSBarry Smith 0, 2009521d7252SBarry Smith /*50*/ 0, 2010cc2dc46cSBarry Smith 0, 2011cc2dc46cSBarry Smith 0, 2012cc2dc46cSBarry Smith 0, 2013cc2dc46cSBarry Smith 0, 201497304618SKris Buschelman /*55*/ 0, 2015cc2dc46cSBarry Smith 0, 2016cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 2017cc2dc46cSBarry Smith 0, 2018cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 201997304618SKris Buschelman /*60*/ 0, 2020f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2021f14a1c24SBarry Smith MatView_MPIBAIJ, 2022357abbc8SBarry Smith 0, 20237843d17aSBarry Smith 0, 202497304618SKris Buschelman /*65*/ 0, 20257843d17aSBarry Smith 0, 20267843d17aSBarry Smith 0, 20277843d17aSBarry Smith 0, 20287843d17aSBarry Smith 0, 202997304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ, 20307843d17aSBarry Smith 0, 203197304618SKris Buschelman 0, 203297304618SKris Buschelman 0, 203397304618SKris Buschelman 0, 203497304618SKris Buschelman /*75*/ 0, 203597304618SKris Buschelman 0, 203697304618SKris Buschelman 0, 203797304618SKris Buschelman 0, 203897304618SKris Buschelman 0, 203997304618SKris Buschelman /*80*/ 0, 204097304618SKris Buschelman 0, 204197304618SKris Buschelman 0, 204297304618SKris Buschelman 0, 2043865e5f61SKris Buschelman MatLoad_MPIBAIJ, 2044865e5f61SKris Buschelman /*85*/ 0, 2045865e5f61SKris Buschelman 0, 2046865e5f61SKris Buschelman 0, 2047865e5f61SKris Buschelman 0, 2048865e5f61SKris Buschelman 0, 2049865e5f61SKris Buschelman /*90*/ 0, 2050865e5f61SKris Buschelman 0, 2051865e5f61SKris Buschelman 0, 2052865e5f61SKris Buschelman 0, 2053865e5f61SKris Buschelman 0, 2054865e5f61SKris Buschelman /*95*/ 0, 2055865e5f61SKris Buschelman 0, 2056865e5f61SKris Buschelman 0, 205799cafbc1SBarry Smith 0, 205899cafbc1SBarry Smith 0, 205999cafbc1SBarry Smith /*100*/0, 206099cafbc1SBarry Smith 0, 206199cafbc1SBarry Smith 0, 206299cafbc1SBarry Smith 0, 206399cafbc1SBarry Smith 0, 206499cafbc1SBarry Smith /*105*/0, 206599cafbc1SBarry Smith MatRealPart_MPIBAIJ, 206699cafbc1SBarry Smith MatImaginaryPart_MPIBAIJ}; 206779bdfe76SSatish Balay 20685ef9f2a5SBarry Smith 2069e18c124aSSatish Balay EXTERN_C_BEGIN 20704a2ae208SSatish Balay #undef __FUNCT__ 20714a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2072be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20735ef9f2a5SBarry Smith { 20745ef9f2a5SBarry Smith PetscFunctionBegin; 20755ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20765ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20775ef9f2a5SBarry Smith PetscFunctionReturn(0); 20785ef9f2a5SBarry Smith } 2079e18c124aSSatish Balay EXTERN_C_END 208079bdfe76SSatish Balay 2081273d9f13SBarry Smith EXTERN_C_BEGIN 2082f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2083d94109b8SHong Zhang EXTERN_C_END 2084d94109b8SHong Zhang 2085aac34f13SBarry Smith #undef __FUNCT__ 2086aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2087b7940d39SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2088aac34f13SBarry Smith { 2089899cda47SBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2090899cda47SBarry Smith PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2091899cda47SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2092aac34f13SBarry Smith const PetscInt *JJ; 2093aac34f13SBarry Smith PetscScalar *values; 2094aac34f13SBarry Smith PetscErrorCode ierr; 2095aac34f13SBarry Smith 2096aac34f13SBarry Smith PetscFunctionBegin; 2097aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2098b7940d39SSatish Balay if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2099aac34f13SBarry Smith #endif 2100aac34f13SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2101aac34f13SBarry Smith o_nnz = d_nnz + m; 2102aac34f13SBarry Smith 2103aac34f13SBarry Smith for (i=0; i<m; i++) { 2104b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 2105b7940d39SSatish Balay JJ = J + Ii[i]; 2106aac34f13SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 2107aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2108aac34f13SBarry Smith if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2109aac34f13SBarry Smith #endif 2110aac34f13SBarry Smith for (j=0; j<nnz; j++) { 2111aac34f13SBarry Smith if (*JJ >= cstart) break; 2112aac34f13SBarry Smith JJ++; 2113aac34f13SBarry Smith } 2114aac34f13SBarry Smith d = 0; 2115aac34f13SBarry Smith for (; j<nnz; j++) { 2116aac34f13SBarry Smith if (*JJ++ >= cend) break; 2117aac34f13SBarry Smith d++; 2118aac34f13SBarry Smith } 2119aac34f13SBarry Smith d_nnz[i] = d; 2120aac34f13SBarry Smith o_nnz[i] = nnz - d; 2121aac34f13SBarry Smith } 2122aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123aac34f13SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2124aac34f13SBarry Smith 2125aac34f13SBarry Smith if (v) values = (PetscScalar*)v; 2126aac34f13SBarry Smith else { 2127aac34f13SBarry Smith ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2128aac34f13SBarry Smith ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2129aac34f13SBarry Smith } 2130aac34f13SBarry Smith 2131aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2132aac34f13SBarry Smith for (i=0; i<m; i++) { 2133aac34f13SBarry Smith ii = i + rstart; 2134b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 2135b7940d39SSatish Balay ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2136aac34f13SBarry Smith } 2137aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2140aac34f13SBarry Smith 2141aac34f13SBarry Smith if (!v) { 2142aac34f13SBarry Smith ierr = PetscFree(values);CHKERRQ(ierr); 2143aac34f13SBarry Smith } 2144aac34f13SBarry Smith PetscFunctionReturn(0); 2145aac34f13SBarry Smith } 2146aac34f13SBarry Smith 2147aac34f13SBarry Smith #undef __FUNCT__ 2148aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2149aac34f13SBarry Smith /*@C 2150aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2151aac34f13SBarry Smith (the default parallel PETSc format). 2152aac34f13SBarry Smith 2153aac34f13SBarry Smith Collective on MPI_Comm 2154aac34f13SBarry Smith 2155aac34f13SBarry Smith Input Parameters: 2156aac34f13SBarry Smith + A - the matrix 2157aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2158aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2159aac34f13SBarry Smith - v - optional values in the matrix 2160aac34f13SBarry Smith 2161aac34f13SBarry Smith Level: developer 2162aac34f13SBarry Smith 2163aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2164aac34f13SBarry Smith 2165aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2166aac34f13SBarry Smith @*/ 2167be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2168aac34f13SBarry Smith { 2169aac34f13SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2170aac34f13SBarry Smith 2171aac34f13SBarry Smith PetscFunctionBegin; 2172aac34f13SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2173aac34f13SBarry Smith if (f) { 2174aac34f13SBarry Smith ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2175aac34f13SBarry Smith } 2176aac34f13SBarry Smith PetscFunctionReturn(0); 2177aac34f13SBarry Smith } 2178aac34f13SBarry Smith 2179d94109b8SHong Zhang EXTERN_C_BEGIN 21804a2ae208SSatish Balay #undef __FUNCT__ 2181a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2182be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2183a23d5eceSKris Buschelman { 2184a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2185dfbe8321SBarry Smith PetscErrorCode ierr; 2186b24ad042SBarry Smith PetscInt i; 2187a23d5eceSKris Buschelman 2188a23d5eceSKris Buschelman PetscFunctionBegin; 2189a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 21908c07d4e3SBarry Smith ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 21918c07d4e3SBarry Smith ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 21928c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2193a23d5eceSKris Buschelman 2194a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2195a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2196a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 219777431f27SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 219877431f27SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2199899cda47SBarry Smith 2200899cda47SBarry Smith B->rmap.bs = bs; 2201899cda47SBarry Smith B->cmap.bs = bs; 2202899cda47SBarry Smith ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2203899cda47SBarry Smith ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2204899cda47SBarry Smith 2205a23d5eceSKris Buschelman if (d_nnz) { 2206899cda47SBarry Smith for (i=0; i<B->rmap.n/bs; i++) { 220777431f27SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2208a23d5eceSKris Buschelman } 2209a23d5eceSKris Buschelman } 2210a23d5eceSKris Buschelman if (o_nnz) { 2211899cda47SBarry Smith for (i=0; i<B->rmap.n/bs; i++) { 221277431f27SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2213a23d5eceSKris Buschelman } 2214a23d5eceSKris Buschelman } 2215a23d5eceSKris Buschelman 2216a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2217a23d5eceSKris Buschelman b->bs2 = bs*bs; 2218899cda47SBarry Smith b->mbs = B->rmap.n/bs; 2219899cda47SBarry Smith b->nbs = B->cmap.n/bs; 2220899cda47SBarry Smith b->Mbs = B->rmap.N/bs; 2221899cda47SBarry Smith b->Nbs = B->cmap.N/bs; 2222a23d5eceSKris Buschelman 2223a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2224899cda47SBarry Smith b->rangebs[i] = B->rmap.range[i]/bs; 2225a23d5eceSKris Buschelman } 2226899cda47SBarry Smith b->rstartbs = B->rmap.rstart/bs; 2227899cda47SBarry Smith b->rendbs = B->rmap.rend/bs; 2228899cda47SBarry Smith b->cstartbs = B->cmap.rstart/bs; 2229899cda47SBarry Smith b->cendbs = B->cmap.rend/bs; 2230a23d5eceSKris Buschelman 2231f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2232899cda47SBarry Smith ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 22339c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2234c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 223552e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2236f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2237899cda47SBarry Smith ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 22389c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2239c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 224052e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2241c60e587dSKris Buschelman 2242a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2243a23d5eceSKris Buschelman 2244a23d5eceSKris Buschelman PetscFunctionReturn(0); 2245a23d5eceSKris Buschelman } 2246a23d5eceSKris Buschelman EXTERN_C_END 2247a23d5eceSKris Buschelman 2248a23d5eceSKris Buschelman EXTERN_C_BEGIN 2249be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2250be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 225192b32695SKris Buschelman EXTERN_C_END 22525bf65638SKris Buschelman 22530bad9183SKris Buschelman /*MC 2254fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 22550bad9183SKris Buschelman 22560bad9183SKris Buschelman Options Database Keys: 22578c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 22588c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 22598c07d4e3SBarry Smith - -mat_use_hash_table <fact> 22600bad9183SKris Buschelman 22610bad9183SKris Buschelman Level: beginner 22620bad9183SKris Buschelman 22630bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 22640bad9183SKris Buschelman M*/ 22650bad9183SKris Buschelman 226692b32695SKris Buschelman EXTERN_C_BEGIN 2267a23d5eceSKris Buschelman #undef __FUNCT__ 22684a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2269be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2270273d9f13SBarry Smith { 2271273d9f13SBarry Smith Mat_MPIBAIJ *b; 2272dfbe8321SBarry Smith PetscErrorCode ierr; 2273273d9f13SBarry Smith PetscTruth flg; 2274273d9f13SBarry Smith 2275273d9f13SBarry Smith PetscFunctionBegin; 227682502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 227782502324SSatish Balay B->data = (void*)b; 227882502324SSatish Balay 2279085a36d4SBarry Smith 2280273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2281273d9f13SBarry Smith B->mapping = 0; 2282273d9f13SBarry Smith B->factor = 0; 2283273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2284273d9f13SBarry Smith 2285273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2286273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2287273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2288273d9f13SBarry Smith 2289273d9f13SBarry Smith /* build local table of row and column ownerships */ 2290899cda47SBarry Smith ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2291273d9f13SBarry Smith 2292273d9f13SBarry Smith /* build cache for off array entries formed */ 2293273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2294273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2295273d9f13SBarry Smith b->colmap = PETSC_NULL; 2296273d9f13SBarry Smith b->garray = PETSC_NULL; 2297273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2298273d9f13SBarry Smith 2299cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2300273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 230164a35ccbSBarry Smith b->setvalueslen = 0; 2302273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2303273d9f13SBarry Smith #endif 2304273d9f13SBarry Smith 2305273d9f13SBarry Smith /* stuff used in block assembly */ 2306273d9f13SBarry Smith b->barray = 0; 2307273d9f13SBarry Smith 2308273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2309273d9f13SBarry Smith b->lvec = 0; 2310273d9f13SBarry Smith b->Mvctx = 0; 2311273d9f13SBarry Smith 2312273d9f13SBarry Smith /* stuff for MatGetRow() */ 2313273d9f13SBarry Smith b->rowindices = 0; 2314273d9f13SBarry Smith b->rowvalues = 0; 2315273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2316273d9f13SBarry Smith 2317273d9f13SBarry Smith /* hash table stuff */ 2318273d9f13SBarry Smith b->ht = 0; 2319273d9f13SBarry Smith b->hd = 0; 2320273d9f13SBarry Smith b->ht_size = 0; 2321273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2322273d9f13SBarry Smith b->ht_fact = 0; 2323273d9f13SBarry Smith b->ht_total_ct = 0; 2324273d9f13SBarry Smith b->ht_insert_ct = 0; 2325273d9f13SBarry Smith 232677925062SSatish Balay ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 23278c07d4e3SBarry Smith ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2328273d9f13SBarry Smith if (flg) { 2329f6275e2eSBarry Smith PetscReal fact = 1.39; 2330273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 23318c07d4e3SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2332273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2333273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2334ae15b995SBarry Smith ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2335273d9f13SBarry Smith } 23368c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 23378c07d4e3SBarry Smith 2338273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2339273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2340273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2341273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2342273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2343273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2344273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2345273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2346273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2347a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2348a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2349a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2350aac34f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2351*44ec7894SLisandro Dalcin "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2352aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 235392b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 235492b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 235592b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 23565bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 23575bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 23585bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 235917667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2360273d9f13SBarry Smith PetscFunctionReturn(0); 2361273d9f13SBarry Smith } 2362273d9f13SBarry Smith EXTERN_C_END 2363273d9f13SBarry Smith 2364209238afSKris Buschelman /*MC 2365002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2366209238afSKris Buschelman 2367209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2368209238afSKris Buschelman and MATMPIBAIJ otherwise. 2369209238afSKris Buschelman 2370209238afSKris Buschelman Options Database Keys: 2371209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2372209238afSKris Buschelman 2373209238afSKris Buschelman Level: beginner 2374209238afSKris Buschelman 2375aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2376209238afSKris Buschelman M*/ 2377209238afSKris Buschelman 2378209238afSKris Buschelman EXTERN_C_BEGIN 2379209238afSKris Buschelman #undef __FUNCT__ 2380209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2381be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2382dfbe8321SBarry Smith { 23836849ba73SBarry Smith PetscErrorCode ierr; 2384b24ad042SBarry Smith PetscMPIInt size; 2385209238afSKris Buschelman 2386209238afSKris Buschelman PetscFunctionBegin; 2387209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2388209238afSKris Buschelman if (size == 1) { 2389209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2390209238afSKris Buschelman } else { 2391209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2392209238afSKris Buschelman } 2393209238afSKris Buschelman PetscFunctionReturn(0); 2394209238afSKris Buschelman } 2395209238afSKris Buschelman EXTERN_C_END 2396209238afSKris Buschelman 23974a2ae208SSatish Balay #undef __FUNCT__ 23984a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2399273d9f13SBarry Smith /*@C 2400aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2401273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2402273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2403273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2404273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2405273d9f13SBarry Smith 2406273d9f13SBarry Smith Collective on Mat 2407273d9f13SBarry Smith 2408273d9f13SBarry Smith Input Parameters: 2409273d9f13SBarry Smith + A - the matrix 2410273d9f13SBarry Smith . bs - size of blockk 2411273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2412273d9f13SBarry Smith submatrix (same for all local rows) 2413273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2414273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2415273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2416273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2417273d9f13SBarry Smith submatrix (same for all local rows). 2418273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2419273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2420273d9f13SBarry Smith each block row) or PETSC_NULL. 2421273d9f13SBarry Smith 242249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 2423273d9f13SBarry Smith 2424273d9f13SBarry Smith Options Database Keys: 24258c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 24268c07d4e3SBarry Smith - -mat_use_hash_table <fact> 2427273d9f13SBarry Smith 2428273d9f13SBarry Smith Notes: 2429273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2430273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2431273d9f13SBarry Smith 2432273d9f13SBarry Smith Storage Information: 2433273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2434273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2435273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2436273d9f13SBarry Smith local matrix (a rectangular submatrix). 2437273d9f13SBarry Smith 2438273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2439273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2440273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2441273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2442273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2443273d9f13SBarry Smith 2444273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2445273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2446273d9f13SBarry Smith 2447273d9f13SBarry Smith .vb 2448273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2449273d9f13SBarry Smith ------------------- 2450273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2451273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2452273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2453273d9f13SBarry Smith ------------------- 2454273d9f13SBarry Smith .ve 2455273d9f13SBarry Smith 2456273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2457273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2458273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2459273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2460273d9f13SBarry Smith 2461273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2462273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2463273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2464273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2465273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2466273d9f13SBarry Smith matrices. 2467273d9f13SBarry Smith 2468273d9f13SBarry Smith Level: intermediate 2469273d9f13SBarry Smith 2470273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2471273d9f13SBarry Smith 2472aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2473273d9f13SBarry Smith @*/ 2474be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2475273d9f13SBarry Smith { 2476b24ad042SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2477273d9f13SBarry Smith 2478273d9f13SBarry Smith PetscFunctionBegin; 2479a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2480a23d5eceSKris Buschelman if (f) { 2481a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2482273d9f13SBarry Smith } 2483273d9f13SBarry Smith PetscFunctionReturn(0); 2484273d9f13SBarry Smith } 2485273d9f13SBarry Smith 24864a2ae208SSatish Balay #undef __FUNCT__ 24874a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 248879bdfe76SSatish Balay /*@C 248979bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 249079bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 249179bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 249279bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 249379bdfe76SSatish Balay performance can be increased by more than a factor of 50. 249479bdfe76SSatish Balay 2495db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2496db81eaa0SLois Curfman McInnes 249779bdfe76SSatish Balay Input Parameters: 2498db81eaa0SLois Curfman McInnes + comm - MPI communicator 249979bdfe76SSatish Balay . bs - size of blockk 250079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 250192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 250292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 250392e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 250492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 250592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2506be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2507be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 250847a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 250979bdfe76SSatish Balay submatrix (same for all local rows) 251047a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 251192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2512db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 251347a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 251479bdfe76SSatish Balay submatrix (same for all local rows). 251547a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 251692e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 251792e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 251879bdfe76SSatish Balay 251979bdfe76SSatish Balay Output Parameter: 252079bdfe76SSatish Balay . A - the matrix 252179bdfe76SSatish Balay 2522db81eaa0SLois Curfman McInnes Options Database Keys: 25238c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 25248c07d4e3SBarry Smith - -mat_use_hash_table <fact> 25253ffaccefSLois Curfman McInnes 2526b259b22eSLois Curfman McInnes Notes: 252749a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 252849a6f317SBarry Smith 252947a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 253047a75d0bSBarry Smith 253179bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 253279bdfe76SSatish Balay (possibly both). 253379bdfe76SSatish Balay 2534be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2535be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2536be79a94dSBarry Smith 253779bdfe76SSatish Balay Storage Information: 253879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 253979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 254079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 254179bdfe76SSatish Balay local matrix (a rectangular submatrix). 254279bdfe76SSatish Balay 254379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 254479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 254579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 254679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 254779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 254879bdfe76SSatish Balay 254979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 255079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 255179bdfe76SSatish Balay 2552db81eaa0SLois Curfman McInnes .vb 2553db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2554db81eaa0SLois Curfman McInnes ------------------- 2555db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2556db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2557db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2558db81eaa0SLois Curfman McInnes ------------------- 2559db81eaa0SLois Curfman McInnes .ve 256079bdfe76SSatish Balay 256179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 256279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 256379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 256457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 256579bdfe76SSatish Balay 2566d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2567d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 256879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 256992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 257092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 25716da5968aSLois Curfman McInnes matrices. 257279bdfe76SSatish Balay 2573027ccd11SLois Curfman McInnes Level: intermediate 2574027ccd11SLois Curfman McInnes 257592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 257679bdfe76SSatish Balay 2577aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 257879bdfe76SSatish Balay @*/ 2579be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 258079bdfe76SSatish Balay { 25816849ba73SBarry Smith PetscErrorCode ierr; 2582b24ad042SBarry Smith PetscMPIInt size; 258379bdfe76SSatish Balay 2584d64ed03dSBarry Smith PetscFunctionBegin; 2585f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2586f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2587d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2588273d9f13SBarry Smith if (size > 1) { 2589273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2590273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2591273d9f13SBarry Smith } else { 2592273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2593273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 25943914022bSBarry Smith } 25953a40ed3dSBarry Smith PetscFunctionReturn(0); 259679bdfe76SSatish Balay } 2597026e39d0SSatish Balay 25984a2ae208SSatish Balay #undef __FUNCT__ 25994a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 26006849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 26010ac07820SSatish Balay { 26020ac07820SSatish Balay Mat mat; 26030ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2604dfbe8321SBarry Smith PetscErrorCode ierr; 2605b24ad042SBarry Smith PetscInt len=0; 26060ac07820SSatish Balay 2607d64ed03dSBarry Smith PetscFunctionBegin; 26080ac07820SSatish Balay *newmat = 0; 2609f69a0ea3SMatthew Knepley ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2610899cda47SBarry Smith ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2611be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 26121d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 26137fff6886SHong Zhang 26144beb1cfeSHong Zhang mat->factor = matin->factor; 2615273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 26160ac07820SSatish Balay mat->assembled = PETSC_TRUE; 26177fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 26187fff6886SHong Zhang 2619273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 2620899cda47SBarry Smith mat->rmap.bs = matin->rmap.bs; 26210ac07820SSatish Balay a->bs2 = oldmat->bs2; 26220ac07820SSatish Balay a->mbs = oldmat->mbs; 26230ac07820SSatish Balay a->nbs = oldmat->nbs; 26240ac07820SSatish Balay a->Mbs = oldmat->Mbs; 26250ac07820SSatish Balay a->Nbs = oldmat->Nbs; 26260ac07820SSatish Balay 2627899cda47SBarry Smith ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2628899cda47SBarry Smith ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2629899cda47SBarry Smith 26300ac07820SSatish Balay a->size = oldmat->size; 26310ac07820SSatish Balay a->rank = oldmat->rank; 2632aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2633aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2634aef5e8e0SSatish Balay a->rowindices = 0; 26350ac07820SSatish Balay a->rowvalues = 0; 26360ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 263730793edcSSatish Balay a->barray = 0; 2638899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2639899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2640899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2641899cda47SBarry Smith a->cendbs = oldmat->cendbs; 26420ac07820SSatish Balay 2643133cdb44SSatish Balay /* hash table stuff */ 2644133cdb44SSatish Balay a->ht = 0; 2645133cdb44SSatish Balay a->hd = 0; 2646133cdb44SSatish Balay a->ht_size = 0; 2647133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 264825fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2649133cdb44SSatish Balay a->ht_total_ct = 0; 2650133cdb44SSatish Balay a->ht_insert_ct = 0; 2651133cdb44SSatish Balay 2652899cda47SBarry Smith ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 26538798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2654899cda47SBarry Smith ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 26550ac07820SSatish Balay if (oldmat->colmap) { 2656aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 26570f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 265848e59246SSatish Balay #else 2659b24ad042SBarry Smith ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 266052e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2661b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 266248e59246SSatish Balay #endif 26630ac07820SSatish Balay } else a->colmap = 0; 26644beb1cfeSHong Zhang 26650ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2666b24ad042SBarry Smith ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 266752e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2668b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 26690ac07820SSatish Balay } else a->garray = 0; 26700ac07820SSatish Balay 26710ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 267252e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 26730ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 267452e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 26757fff6886SHong Zhang 26762e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 267752e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 26782e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 267952e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2680b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 26810ac07820SSatish Balay *newmat = mat; 26824beb1cfeSHong Zhang 26833a40ed3dSBarry Smith PetscFunctionReturn(0); 26840ac07820SSatish Balay } 268557b952d6SSatish Balay 2686e090d566SSatish Balay #include "petscsys.h" 268757b952d6SSatish Balay 26884a2ae208SSatish Balay #undef __FUNCT__ 26894a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2690f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 269157b952d6SSatish Balay { 269257b952d6SSatish Balay Mat A; 26936849ba73SBarry Smith PetscErrorCode ierr; 2694b24ad042SBarry Smith int fd; 2695b24ad042SBarry Smith PetscInt i,nz,j,rstart,rend; 269687828ca2SBarry Smith PetscScalar *vals,*buf; 269757b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 269857b952d6SSatish Balay MPI_Status status; 2699b24ad042SBarry Smith PetscMPIInt rank,size,maxnz; 2700b24ad042SBarry Smith PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2701910ba992SMatthew Knepley PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2702167e7480SBarry Smith PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2703dc231df0SBarry Smith PetscMPIInt tag = ((PetscObject)viewer)->tag; 2704910ba992SMatthew Knepley PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2705dc231df0SBarry Smith PetscInt dcount,kmax,k,nzcount,tmp,mend; 270657b952d6SSatish Balay 2707d64ed03dSBarry Smith PetscFunctionBegin; 270877925062SSatish Balay ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 27098c07d4e3SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 27108c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 271157b952d6SSatish Balay 2712d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2713d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 271457b952d6SSatish Balay if (!rank) { 2715b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2716e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2717552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 27186c5fab8fSBarry Smith } 2719d64ed03dSBarry Smith 2720b24ad042SBarry Smith ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 272157b952d6SSatish Balay M = header[1]; N = header[2]; 272257b952d6SSatish Balay 272329bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 272457b952d6SSatish Balay 272557b952d6SSatish Balay /* 272657b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 272757b952d6SSatish Balay divisible by the blocksize 272857b952d6SSatish Balay */ 272957b952d6SSatish Balay Mbs = M/bs; 2730dc231df0SBarry Smith extra_rows = bs - M + bs*Mbs; 273157b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 273257b952d6SSatish Balay else Mbs++; 273357b952d6SSatish Balay if (extra_rows && !rank) { 2734ae15b995SBarry Smith ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 273557b952d6SSatish Balay } 2736537820f0SBarry Smith 273757b952d6SSatish Balay /* determine ownership of all rows */ 273857b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 273957b952d6SSatish Balay m = mbs*bs; 2740dc231df0SBarry Smith ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2741b24ad042SBarry Smith ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2742167e7480SBarry Smith 2743167e7480SBarry Smith /* process 0 needs enough room for process with most rows */ 2744167e7480SBarry Smith if (!rank) { 2745167e7480SBarry Smith mmax = rowners[1]; 2746167e7480SBarry Smith for (i=2; i<size; i++) { 2747167e7480SBarry Smith mmax = PetscMax(mmax,rowners[i]); 2748167e7480SBarry Smith } 2749ca02efcfSSatish Balay mmax*=bs; 2750167e7480SBarry Smith } else mmax = m; 2751167e7480SBarry Smith 275257b952d6SSatish Balay rowners[0] = 0; 2753cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2754cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 275557b952d6SSatish Balay rstart = rowners[rank]; 275657b952d6SSatish Balay rend = rowners[rank+1]; 275757b952d6SSatish Balay 275857b952d6SSatish Balay /* distribute row lengths to all processors */ 275919c38ff2SBarry Smith ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 276057b952d6SSatish Balay if (!rank) { 2761dc231df0SBarry Smith mend = m; 2762dc231df0SBarry Smith if (size == 1) mend = mend - extra_rows; 2763dc231df0SBarry Smith ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2764dc231df0SBarry Smith for (j=mend; j<m; j++) locrowlens[j] = 1; 2765dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2766b24ad042SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2767b24ad042SBarry Smith ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2768dc231df0SBarry Smith for (j=0; j<m; j++) { 2769dc231df0SBarry Smith procsnz[0] += locrowlens[j]; 2770dc231df0SBarry Smith } 2771dc231df0SBarry Smith for (i=1; i<size; i++) { 2772dc231df0SBarry Smith mend = browners[i+1] - browners[i]; 2773dc231df0SBarry Smith if (i == size-1) mend = mend - extra_rows; 2774dc231df0SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2775dc231df0SBarry Smith for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2776dc231df0SBarry Smith /* calculate the number of nonzeros on each processor */ 2777dc231df0SBarry Smith for (j=0; j<browners[i+1]-browners[i]; j++) { 277857b952d6SSatish Balay procsnz[i] += rowlengths[j]; 277957b952d6SSatish Balay } 2780dc231df0SBarry Smith ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 278157b952d6SSatish Balay } 2782606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2783dc231df0SBarry Smith } else { 2784dc231df0SBarry Smith ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2785dc231df0SBarry Smith } 278657b952d6SSatish Balay 2787dc231df0SBarry Smith if (!rank) { 278857b952d6SSatish Balay /* determine max buffer needed and allocate it */ 27898a8e0b3aSBarry Smith maxnz = procsnz[0]; 2790cdc0ba36SBarry Smith for (i=1; i<size; i++) { 279157b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 279257b952d6SSatish Balay } 2793b24ad042SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 279457b952d6SSatish Balay 279557b952d6SSatish Balay /* read in my part of the matrix column indices */ 279657b952d6SSatish Balay nz = procsnz[0]; 279719c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 279857b952d6SSatish Balay mycols = ibuf; 2799cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2800e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2801cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2802cee3aa6bSSatish Balay 280357b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 280457b952d6SSatish Balay for (i=1; i<size-1; i++) { 280557b952d6SSatish Balay nz = procsnz[i]; 2806e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2807b24ad042SBarry Smith ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280857b952d6SSatish Balay } 280957b952d6SSatish Balay /* read in the stuff for the last proc */ 281057b952d6SSatish Balay if (size != 1) { 281157b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2812e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 281357b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2814b24ad042SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 281557b952d6SSatish Balay } 2816606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2817d64ed03dSBarry Smith } else { 281857b952d6SSatish Balay /* determine buffer space needed for message */ 281957b952d6SSatish Balay nz = 0; 282057b952d6SSatish Balay for (i=0; i<m; i++) { 282157b952d6SSatish Balay nz += locrowlens[i]; 282257b952d6SSatish Balay } 282319c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 282457b952d6SSatish Balay mycols = ibuf; 282557b952d6SSatish Balay /* receive message of column indices*/ 2826b24ad042SBarry Smith ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2827b24ad042SBarry Smith ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 282829bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 282957b952d6SSatish Balay } 283057b952d6SSatish Balay 283157b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2832dc231df0SBarry Smith ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2833dc231df0SBarry Smith ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2834dc231df0SBarry Smith ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2835dc231df0SBarry Smith ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2836dc231df0SBarry Smith ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 283757b952d6SSatish Balay rowcount = 0; nzcount = 0; 283857b952d6SSatish Balay for (i=0; i<mbs; i++) { 283957b952d6SSatish Balay dcount = 0; 284057b952d6SSatish Balay odcount = 0; 284157b952d6SSatish Balay for (j=0; j<bs; j++) { 284257b952d6SSatish Balay kmax = locrowlens[rowcount]; 284357b952d6SSatish Balay for (k=0; k<kmax; k++) { 284457b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 284557b952d6SSatish Balay if (!mask[tmp]) { 284657b952d6SSatish Balay mask[tmp] = 1; 284757b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 284857b952d6SSatish Balay else masked1[dcount++] = tmp; 284957b952d6SSatish Balay } 285057b952d6SSatish Balay } 285157b952d6SSatish Balay rowcount++; 285257b952d6SSatish Balay } 2853cee3aa6bSSatish Balay 285457b952d6SSatish Balay dlens[i] = dcount; 285557b952d6SSatish Balay odlens[i] = odcount; 2856cee3aa6bSSatish Balay 285757b952d6SSatish Balay /* zero out the mask elements we set */ 285857b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 285957b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 286057b952d6SSatish Balay } 2861cee3aa6bSSatish Balay 286257b952d6SSatish Balay /* create our matrix */ 2863f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2864f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 286578ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 286678ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 286778ae41b4SKris Buschelman 286878ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2869dc231df0SBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 287057b952d6SSatish Balay 287157b952d6SSatish Balay if (!rank) { 287219c38ff2SBarry Smith ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 287357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 287457b952d6SSatish Balay nz = procsnz[0]; 287557b952d6SSatish Balay vals = buf; 2876cee3aa6bSSatish Balay mycols = ibuf; 2877cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2878e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2879cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2880537820f0SBarry Smith 288157b952d6SSatish Balay /* insert into matrix */ 288257b952d6SSatish Balay jj = rstart*bs; 288357b952d6SSatish Balay for (i=0; i<m; i++) { 2884dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 288557b952d6SSatish Balay mycols += locrowlens[i]; 288657b952d6SSatish Balay vals += locrowlens[i]; 288757b952d6SSatish Balay jj++; 288857b952d6SSatish Balay } 288957b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 289057b952d6SSatish Balay for (i=1; i<size-1; i++) { 289157b952d6SSatish Balay nz = procsnz[i]; 289257b952d6SSatish Balay vals = buf; 2893e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2894ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 289557b952d6SSatish Balay } 289657b952d6SSatish Balay /* the last proc */ 289757b952d6SSatish Balay if (size != 1){ 289857b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2899cee3aa6bSSatish Balay vals = buf; 2900e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 290157b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2902ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 290357b952d6SSatish Balay } 2904606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2905d64ed03dSBarry Smith } else { 290657b952d6SSatish Balay /* receive numeric values */ 290719c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 290857b952d6SSatish Balay 290957b952d6SSatish Balay /* receive message of values*/ 291057b952d6SSatish Balay vals = buf; 2911cee3aa6bSSatish Balay mycols = ibuf; 2912ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2913ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 291429bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 291557b952d6SSatish Balay 291657b952d6SSatish Balay /* insert into matrix */ 291757b952d6SSatish Balay jj = rstart*bs; 2918cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2919dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 292057b952d6SSatish Balay mycols += locrowlens[i]; 292157b952d6SSatish Balay vals += locrowlens[i]; 292257b952d6SSatish Balay jj++; 292357b952d6SSatish Balay } 292457b952d6SSatish Balay } 2925606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2926606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2927606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2928dc231df0SBarry Smith ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2929dc231df0SBarry Smith ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2930dc231df0SBarry Smith ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 29316d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29326d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 293378ae41b4SKris Buschelman 293478ae41b4SKris Buschelman *newmat = A; 29353a40ed3dSBarry Smith PetscFunctionReturn(0); 293657b952d6SSatish Balay } 293757b952d6SSatish Balay 29384a2ae208SSatish Balay #undef __FUNCT__ 29394a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2940133cdb44SSatish Balay /*@ 2941133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2942133cdb44SSatish Balay 2943133cdb44SSatish Balay Input Parameters: 2944133cdb44SSatish Balay . mat - the matrix 2945133cdb44SSatish Balay . fact - factor 2946133cdb44SSatish Balay 2947fee21e36SBarry Smith Collective on Mat 2948fee21e36SBarry Smith 29498c890885SBarry Smith Level: advanced 29508c890885SBarry Smith 2951133cdb44SSatish Balay Notes: 29528c07d4e3SBarry Smith This can also be set by the command line option: -mat_use_hash_table <fact> 2953133cdb44SSatish Balay 2954133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2955133cdb44SSatish Balay 2956133cdb44SSatish Balay .seealso: MatSetOption() 2957133cdb44SSatish Balay @*/ 2958be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2959133cdb44SSatish Balay { 2960dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscReal); 29615bf65638SKris Buschelman 29625bf65638SKris Buschelman PetscFunctionBegin; 29635bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 29645bf65638SKris Buschelman if (f) { 29655bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 29665bf65638SKris Buschelman } 29675bf65638SKris Buschelman PetscFunctionReturn(0); 29685bf65638SKris Buschelman } 29695bf65638SKris Buschelman 2970be1d678aSKris Buschelman EXTERN_C_BEGIN 29715bf65638SKris Buschelman #undef __FUNCT__ 29725bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2973be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 29745bf65638SKris Buschelman { 297525fdafccSSatish Balay Mat_MPIBAIJ *baij; 2976133cdb44SSatish Balay 2977133cdb44SSatish Balay PetscFunctionBegin; 2978133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2979133cdb44SSatish Balay baij->ht_fact = fact; 2980133cdb44SSatish Balay PetscFunctionReturn(0); 2981133cdb44SSatish Balay } 2982be1d678aSKris Buschelman EXTERN_C_END 2983f2a5309cSSatish Balay 29844a2ae208SSatish Balay #undef __FUNCT__ 29854a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2986be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2987f2a5309cSSatish Balay { 2988f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2989f2a5309cSSatish Balay PetscFunctionBegin; 2990f2a5309cSSatish Balay *Ad = a->A; 2991f2a5309cSSatish Balay *Ao = a->B; 2992195d93cdSBarry Smith *colmap = a->garray; 2993f2a5309cSSatish Balay PetscFunctionReturn(0); 2994f2a5309cSSatish Balay } 2995