1be1d678aSKris Buschelman #define PETSCMAT_DLL 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 479bdfe76SSatish Balay 5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 1693fea6afSBarry Smith 1793fea6afSBarry Smith /* UGLY, ugly, ugly 1887828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1993fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2093fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2193fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2293fea6afSBarry Smith into the single precision data structures. 2393fea6afSBarry Smith */ 2493fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 3093fea6afSBarry Smith #else 316fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3293fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3393fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 346fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 356fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3693fea6afSBarry Smith #endif 373b2fbd54SBarry Smith 384a2ae208SSatish Balay #undef __FUNCT__ 394a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 417843d17aSBarry Smith { 427843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43dfbe8321SBarry Smith PetscErrorCode ierr; 44b24ad042SBarry Smith PetscInt i; 4587828ca2SBarry Smith PetscScalar *va,*vb; 467843d17aSBarry Smith Vec vtmp; 477843d17aSBarry Smith 487843d17aSBarry Smith PetscFunctionBegin; 497843d17aSBarry Smith 507843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 511ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 527843d17aSBarry Smith 53899cda47SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 547843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 551ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 567843d17aSBarry Smith 57899cda47SBarry Smith for (i=0; i<A->rmap.n; i++){ 58273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 597843d17aSBarry Smith } 607843d17aSBarry Smith 611ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 621ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 647843d17aSBarry Smith 657843d17aSBarry Smith PetscFunctionReturn(0); 667843d17aSBarry Smith } 677843d17aSBarry Smith 687fc3c18eSBarry Smith EXTERN_C_BEGIN 694a2ae208SSatish Balay #undef __FUNCT__ 704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 727fc3c18eSBarry Smith { 737fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74dfbe8321SBarry Smith PetscErrorCode ierr; 757fc3c18eSBarry Smith 767fc3c18eSBarry Smith PetscFunctionBegin; 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 787fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 797fc3c18eSBarry Smith PetscFunctionReturn(0); 807fc3c18eSBarry Smith } 817fc3c18eSBarry Smith EXTERN_C_END 827fc3c18eSBarry Smith 837fc3c18eSBarry Smith EXTERN_C_BEGIN 844a2ae208SSatish Balay #undef __FUNCT__ 854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 877fc3c18eSBarry Smith { 887fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89dfbe8321SBarry Smith PetscErrorCode ierr; 907fc3c18eSBarry Smith 917fc3c18eSBarry Smith PetscFunctionBegin; 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 937fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 947fc3c18eSBarry Smith PetscFunctionReturn(0); 957fc3c18eSBarry Smith } 967fc3c18eSBarry Smith EXTERN_C_END 977fc3c18eSBarry Smith 98537820f0SBarry Smith /* 99537820f0SBarry Smith Local utility routine that creates a mapping from the global column 10057b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10157b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10257b952d6SSatish Balay length of colmap equals the global matrix length. 10357b952d6SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 10757b952d6SSatish Balay { 10857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10957b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 1106849ba73SBarry Smith PetscErrorCode ierr; 111899cda47SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 11257b952d6SSatish Balay 113d64ed03dSBarry Smith PetscFunctionBegin; 114aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 115f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11648e59246SSatish Balay for (i=0; i<nbs; i++){ 1170f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11848e59246SSatish Balay } 11948e59246SSatish Balay #else 120b24ad042SBarry Smith ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 12152e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12448e59246SSatish Balay #endif 1253a40ed3dSBarry Smith PetscFunctionReturn(0); 12657b952d6SSatish Balay } 12757b952d6SSatish Balay 12880c1aa95SSatish Balay #define CHUNKSIZE 10 12980c1aa95SSatish Balay 130f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13180c1aa95SSatish Balay { \ 13280c1aa95SSatish Balay \ 13380c1aa95SSatish Balay brow = row/bs; \ 13480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13680c1aa95SSatish Balay bcol = col/bs; \ 13780c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 138ab26458aSBarry Smith low = 0; high = nrow; \ 139ab26458aSBarry Smith while (high-low > 3) { \ 140ab26458aSBarry Smith t = (low+high)/2; \ 141ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 142ab26458aSBarry Smith else low = t; \ 143ab26458aSBarry Smith } \ 144ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14580c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14680c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14780c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 148eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 149eada6651SSatish Balay else *bap = value; \ 150ac7a638eSSatish Balay goto a_noinsert; \ 15180c1aa95SSatish Balay } \ 15280c1aa95SSatish Balay } \ 15389280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 154085a36d4SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155e6b907acSBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \ 15680c1aa95SSatish Balay N = nrow++ - 1; \ 15780c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 15880c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 15980c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1603eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16180c1aa95SSatish Balay } \ 1623eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 16380c1aa95SSatish Balay rp[_i] = bcol; \ 16480c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 165ac7a638eSSatish Balay a_noinsert:; \ 16680c1aa95SSatish Balay ailen[brow] = nrow; \ 16780c1aa95SSatish Balay } 16857b952d6SSatish Balay 169ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 170ac7a638eSSatish Balay { \ 171ac7a638eSSatish Balay brow = row/bs; \ 172ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 174ac7a638eSSatish Balay bcol = col/bs; \ 175ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 176ac7a638eSSatish Balay low = 0; high = nrow; \ 177ac7a638eSSatish Balay while (high-low > 3) { \ 178ac7a638eSSatish Balay t = (low+high)/2; \ 179ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 180ac7a638eSSatish Balay else low = t; \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 183ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 184ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 185ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 186ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 187ac7a638eSSatish Balay else *bap = value; \ 188ac7a638eSSatish Balay goto b_noinsert; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay } \ 19189280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 192085a36d4SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 193e6b907acSBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \ 194085a36d4SBarry Smith CHKMEMQ;\ 195ac7a638eSSatish Balay N = nrow++ - 1; \ 196ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 197ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 198ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 1993eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 200ac7a638eSSatish Balay } \ 2013eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 202ac7a638eSSatish Balay rp[_i] = bcol; \ 203ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 204ac7a638eSSatish Balay b_noinsert:; \ 205ac7a638eSSatish Balay bilen[brow] = nrow; \ 206ac7a638eSSatish Balay } 207ac7a638eSSatish Balay 20893fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2094a2ae208SSatish Balay #undef __FUNCT__ 2104a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 211b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 21257b952d6SSatish Balay { 2136fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 214dfbe8321SBarry Smith PetscErrorCode ierr; 215b24ad042SBarry Smith PetscInt i,N = m*n; 2166fa18ffdSBarry Smith MatScalar *vsingle; 21793fea6afSBarry Smith 21893fea6afSBarry Smith PetscFunctionBegin; 2196fa18ffdSBarry Smith if (N > b->setvalueslen) { 22005b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 22182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2226fa18ffdSBarry Smith b->setvalueslen = N; 2236fa18ffdSBarry Smith } 2246fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2256fa18ffdSBarry Smith 22693fea6afSBarry Smith for (i=0; i<N; i++) { 22793fea6afSBarry Smith vsingle[i] = v[i]; 22893fea6afSBarry Smith } 22993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 23093fea6afSBarry Smith PetscFunctionReturn(0); 23193fea6afSBarry Smith } 23293fea6afSBarry Smith 2334a2ae208SSatish Balay #undef __FUNCT__ 2344a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 235b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 23693fea6afSBarry Smith { 2376fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 238dfbe8321SBarry Smith PetscErrorCode ierr; 239b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2406fa18ffdSBarry Smith MatScalar *vsingle; 24193fea6afSBarry Smith 24293fea6afSBarry Smith PetscFunctionBegin; 2436fa18ffdSBarry Smith if (N > b->setvalueslen) { 24405b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 24582502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2466fa18ffdSBarry Smith b->setvalueslen = N; 2476fa18ffdSBarry Smith } 2486fa18ffdSBarry Smith vsingle = b->setvaluescopy; 24993fea6afSBarry Smith for (i=0; i<N; i++) { 25093fea6afSBarry Smith vsingle[i] = v[i]; 25193fea6afSBarry Smith } 25293fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 25393fea6afSBarry Smith PetscFunctionReturn(0); 25493fea6afSBarry Smith } 2556fa18ffdSBarry Smith 2564a2ae208SSatish Balay #undef __FUNCT__ 2574a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 258b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2596fa18ffdSBarry Smith { 2606fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261dfbe8321SBarry Smith PetscErrorCode ierr; 262b24ad042SBarry Smith PetscInt i,N = m*n; 2636fa18ffdSBarry Smith MatScalar *vsingle; 2646fa18ffdSBarry Smith 2656fa18ffdSBarry Smith PetscFunctionBegin; 2666fa18ffdSBarry Smith if (N > b->setvalueslen) { 26705b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 26882502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2696fa18ffdSBarry Smith b->setvalueslen = N; 2706fa18ffdSBarry Smith } 2716fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2726fa18ffdSBarry Smith for (i=0; i<N; i++) { 2736fa18ffdSBarry Smith vsingle[i] = v[i]; 2746fa18ffdSBarry Smith } 2756fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2766fa18ffdSBarry Smith PetscFunctionReturn(0); 2776fa18ffdSBarry Smith } 2786fa18ffdSBarry Smith 2794a2ae208SSatish Balay #undef __FUNCT__ 2804a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 281b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2826fa18ffdSBarry Smith { 2836fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284dfbe8321SBarry Smith PetscErrorCode ierr; 285b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2866fa18ffdSBarry Smith MatScalar *vsingle; 2876fa18ffdSBarry Smith 2886fa18ffdSBarry Smith PetscFunctionBegin; 2896fa18ffdSBarry Smith if (N > b->setvalueslen) { 29005b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 29182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2926fa18ffdSBarry Smith b->setvalueslen = N; 2936fa18ffdSBarry Smith } 2946fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2956fa18ffdSBarry Smith for (i=0; i<N; i++) { 2966fa18ffdSBarry Smith vsingle[i] = v[i]; 2976fa18ffdSBarry Smith } 2986fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2996fa18ffdSBarry Smith PetscFunctionReturn(0); 3006fa18ffdSBarry Smith } 30193fea6afSBarry Smith #endif 30293fea6afSBarry Smith 3034a2ae208SSatish Balay #undef __FUNCT__ 304e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 305b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 30693fea6afSBarry Smith { 30757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 30893fea6afSBarry Smith MatScalar value; 309273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 310dfbe8321SBarry Smith PetscErrorCode ierr; 311b24ad042SBarry Smith PetscInt i,j,row,col; 312899cda47SBarry Smith PetscInt rstart_orig=mat->rmap.rstart; 313899cda47SBarry Smith PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 314899cda47SBarry Smith PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 31557b952d6SSatish Balay 316eada6651SSatish Balay /* Some Variables required in the macro */ 31780c1aa95SSatish Balay Mat A = baij->A; 31880c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 319b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3203eda8832SBarry Smith MatScalar *aa=a->a; 321ac7a638eSSatish Balay 322ac7a638eSSatish Balay Mat B = baij->B; 323ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 324b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3253eda8832SBarry Smith MatScalar *ba=b->a; 326ac7a638eSSatish Balay 327b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 328b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 3293eda8832SBarry Smith MatScalar *ap,*bap; 33080c1aa95SSatish Balay 331d64ed03dSBarry Smith PetscFunctionBegin; 33257b952d6SSatish Balay for (i=0; i<m; i++) { 3335ef9f2a5SBarry Smith if (im[i] < 0) continue; 3342515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 335899cda47SBarry Smith if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 336639f9d9dSBarry Smith #endif 33757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 33857b952d6SSatish Balay row = im[i] - rstart_orig; 33957b952d6SSatish Balay for (j=0; j<n; j++) { 34057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 34157b952d6SSatish Balay col = in[j] - cstart_orig; 34257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 343f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 34480c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 34573959e64SBarry Smith } else if (in[j] < 0) continue; 3462515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 347899cda47SBarry Smith else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 348639f9d9dSBarry Smith #endif 34957b952d6SSatish Balay else { 35057b952d6SSatish Balay if (mat->was_assembled) { 351905e6a2fSBarry Smith if (!baij->colmap) { 352905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 353905e6a2fSBarry Smith } 354aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3550f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 356bba1ac68SSatish Balay col = col - 1; 35748e59246SSatish Balay #else 358bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 35948e59246SSatish Balay #endif 36057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 36157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3628295de27SSatish Balay col = in[j]; 3639bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 3649bf004c3SSatish Balay B = baij->B; 3659bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 3669bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 3679bf004c3SSatish Balay ba=b->a; 368bba1ac68SSatish Balay } else col += in[j]%bs; 3698295de27SSatish Balay } else col = in[j]; 37057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 37190da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 37290da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 37357b952d6SSatish Balay } 37457b952d6SSatish Balay } 375d64ed03dSBarry Smith } else { 37690f02eecSBarry Smith if (!baij->donotstash) { 377ff2fd236SBarry Smith if (roworiented) { 3786fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 379ff2fd236SBarry Smith } else { 3806fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 38157b952d6SSatish Balay } 38257b952d6SSatish Balay } 38357b952d6SSatish Balay } 38490f02eecSBarry Smith } 3853a40ed3dSBarry Smith PetscFunctionReturn(0); 38657b952d6SSatish Balay } 38757b952d6SSatish Balay 3884a2ae208SSatish Balay #undef __FUNCT__ 3894a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 390b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 391ab26458aSBarry Smith { 392ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 393f15d580aSBarry Smith const MatScalar *value; 394f15d580aSBarry Smith MatScalar *barray=baij->barray; 395273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 396dfbe8321SBarry Smith PetscErrorCode ierr; 397899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 398899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 399899cda47SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 400ab26458aSBarry Smith 401b16ae2b1SBarry Smith PetscFunctionBegin; 40230793edcSSatish Balay if(!barray) { 40382502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 40482502324SSatish Balay baij->barray = barray; 40530793edcSSatish Balay } 40630793edcSSatish Balay 407ab26458aSBarry Smith if (roworiented) { 408ab26458aSBarry Smith stepval = (n-1)*bs; 409ab26458aSBarry Smith } else { 410ab26458aSBarry Smith stepval = (m-1)*bs; 411ab26458aSBarry Smith } 412ab26458aSBarry Smith for (i=0; i<m; i++) { 4135ef9f2a5SBarry Smith if (im[i] < 0) continue; 4142515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 41577431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 416ab26458aSBarry Smith #endif 417ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 418ab26458aSBarry Smith row = im[i] - rstart; 419ab26458aSBarry Smith for (j=0; j<n; j++) { 42015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 42115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 422f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 42315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 424f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 42515b57d14SSatish Balay } else { /* Here a copy is required */ 426ab26458aSBarry Smith if (roworiented) { 427ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 428ab26458aSBarry Smith } else { 429ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 430abef11f7SSatish Balay } 43147513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 43247513183SBarry Smith for (jj=0; jj<bs; jj++) { 43330793edcSSatish Balay *barray++ = *value++; 43447513183SBarry Smith } 43547513183SBarry Smith } 43630793edcSSatish Balay barray -=bs2; 43715b57d14SSatish Balay } 438abef11f7SSatish Balay 439abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 440abef11f7SSatish Balay col = in[j] - cstart; 44193fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 442ab26458aSBarry Smith } 4435ef9f2a5SBarry Smith else if (in[j] < 0) continue; 4442515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 44577431f27SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 446ab26458aSBarry Smith #endif 447ab26458aSBarry Smith else { 448ab26458aSBarry Smith if (mat->was_assembled) { 449ab26458aSBarry Smith if (!baij->colmap) { 450ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451ab26458aSBarry Smith } 452a5eb4965SSatish Balay 4532515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 454aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 455b24ad042SBarry Smith { PetscInt data; 4560f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 45729bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 458fa46199cSSatish Balay } 45948e59246SSatish Balay #else 46029bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461a5eb4965SSatish Balay #endif 46248e59246SSatish Balay #endif 463aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4640f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 465fa46199cSSatish Balay col = (col - 1)/bs; 46648e59246SSatish Balay #else 467a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 46848e59246SSatish Balay #endif 469ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 470ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 471ab26458aSBarry Smith col = in[j]; 472ab26458aSBarry Smith } 473ab26458aSBarry Smith } 474ab26458aSBarry Smith else col = in[j]; 47593fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 476ab26458aSBarry Smith } 477ab26458aSBarry Smith } 478d64ed03dSBarry Smith } else { 479ab26458aSBarry Smith if (!baij->donotstash) { 480ff2fd236SBarry Smith if (roworiented) { 4816fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 482ff2fd236SBarry Smith } else { 4836fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 484ff2fd236SBarry Smith } 485abef11f7SSatish Balay } 486ab26458aSBarry Smith } 487ab26458aSBarry Smith } 4883a40ed3dSBarry Smith PetscFunctionReturn(0); 489ab26458aSBarry Smith } 4906fa18ffdSBarry Smith 4910bdbc534SSatish Balay #define HASH_KEY 0.6180339887 492b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 493b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 4954a2ae208SSatish Balay #undef __FUNCT__ 4964a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 497b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 4980bdbc534SSatish Balay { 4990bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 500273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 501dfbe8321SBarry Smith PetscErrorCode ierr; 502b24ad042SBarry Smith PetscInt i,j,row,col; 503899cda47SBarry Smith PetscInt rstart_orig=mat->rmap.rstart; 504899cda47SBarry Smith PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 505899cda47SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 506329f5518SBarry Smith PetscReal tmp; 5073eda8832SBarry Smith MatScalar **HD = baij->hd,value; 5082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 509b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5104a15367fSSatish Balay #endif 5110bdbc534SSatish Balay 5120bdbc534SSatish Balay PetscFunctionBegin; 5130bdbc534SSatish Balay 5140bdbc534SSatish Balay for (i=0; i<m; i++) { 5152515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 51629bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 517899cda47SBarry Smith if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5180bdbc534SSatish Balay #endif 5190bdbc534SSatish Balay row = im[i]; 520c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5210bdbc534SSatish Balay for (j=0; j<n; j++) { 5220bdbc534SSatish Balay col = in[j]; 5236fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 524b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 525c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 526c2760754SSatish Balay h1 = HASH(size,key,tmp); 5270bdbc534SSatish Balay 528c2760754SSatish Balay 529c2760754SSatish Balay idx = h1; 5302515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 531187ce0cbSSatish Balay insert_ct++; 532187ce0cbSSatish Balay total_ct++; 533187ce0cbSSatish Balay if (HT[idx] != key) { 534187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 535187ce0cbSSatish Balay if (idx == size) { 536187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 537187ce0cbSSatish Balay if (idx == h1) { 53877431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 539187ce0cbSSatish Balay } 540187ce0cbSSatish Balay } 541187ce0cbSSatish Balay } 542187ce0cbSSatish Balay #else 543c2760754SSatish Balay if (HT[idx] != key) { 544c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 545c2760754SSatish Balay if (idx == size) { 546c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 547c2760754SSatish Balay if (idx == h1) { 54877431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 549c2760754SSatish Balay } 550c2760754SSatish Balay } 551c2760754SSatish Balay } 552187ce0cbSSatish Balay #endif 553c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 554c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 555c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5560bdbc534SSatish Balay } 5570bdbc534SSatish Balay } else { 5580bdbc534SSatish Balay if (!baij->donotstash) { 559ff2fd236SBarry Smith if (roworiented) { 5608798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 561ff2fd236SBarry Smith } else { 5628798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5630bdbc534SSatish Balay } 5640bdbc534SSatish Balay } 5650bdbc534SSatish Balay } 5660bdbc534SSatish Balay } 5672515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 568187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 569187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 570187ce0cbSSatish Balay #endif 5710bdbc534SSatish Balay PetscFunctionReturn(0); 5720bdbc534SSatish Balay } 5730bdbc534SSatish Balay 5744a2ae208SSatish Balay #undef __FUNCT__ 5754a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 576b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 5770bdbc534SSatish Balay { 5780bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 579273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 580dfbe8321SBarry Smith PetscErrorCode ierr; 581b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 582899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 583899cda47SBarry Smith PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 584b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 585329f5518SBarry Smith PetscReal tmp; 5863eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 587f15d580aSBarry Smith const MatScalar *v_t,*value; 5882515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 589b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5904a15367fSSatish Balay #endif 5910bdbc534SSatish Balay 592d0a41580SSatish Balay PetscFunctionBegin; 593d0a41580SSatish Balay 5940bdbc534SSatish Balay if (roworiented) { 5950bdbc534SSatish Balay stepval = (n-1)*bs; 5960bdbc534SSatish Balay } else { 5970bdbc534SSatish Balay stepval = (m-1)*bs; 5980bdbc534SSatish Balay } 5990bdbc534SSatish Balay for (i=0; i<m; i++) { 6002515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 60177431f27SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 60277431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 6030bdbc534SSatish Balay #endif 6040bdbc534SSatish Balay row = im[i]; 605187ce0cbSSatish Balay v_t = v + i*bs2; 606c2760754SSatish Balay if (row >= rstart && row < rend) { 6070bdbc534SSatish Balay for (j=0; j<n; j++) { 6080bdbc534SSatish Balay col = in[j]; 6090bdbc534SSatish Balay 6100bdbc534SSatish Balay /* Look up into the Hash Table */ 611c2760754SSatish Balay key = row*Nbs+col+1; 612c2760754SSatish Balay h1 = HASH(size,key,tmp); 6130bdbc534SSatish Balay 614c2760754SSatish Balay idx = h1; 6152515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 616187ce0cbSSatish Balay total_ct++; 617187ce0cbSSatish Balay insert_ct++; 618187ce0cbSSatish Balay if (HT[idx] != key) { 619187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 620187ce0cbSSatish Balay if (idx == size) { 621187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 622187ce0cbSSatish Balay if (idx == h1) { 62377431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624187ce0cbSSatish Balay } 625187ce0cbSSatish Balay } 626187ce0cbSSatish Balay } 627187ce0cbSSatish Balay #else 628c2760754SSatish Balay if (HT[idx] != key) { 629c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 630c2760754SSatish Balay if (idx == size) { 631c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 632c2760754SSatish Balay if (idx == h1) { 63377431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 634c2760754SSatish Balay } 635c2760754SSatish Balay } 636c2760754SSatish Balay } 637187ce0cbSSatish Balay #endif 638c2760754SSatish Balay baij_a = HD[idx]; 6390bdbc534SSatish Balay if (roworiented) { 640c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 641187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 642187ce0cbSSatish Balay value = v_t; 643187ce0cbSSatish Balay v_t += bs; 644fef45726SSatish Balay if (addv == ADD_VALUES) { 645c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 646c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 647fef45726SSatish Balay baij_a[jj] += *value++; 648b4cc0f5aSSatish Balay } 649b4cc0f5aSSatish Balay } 650fef45726SSatish Balay } else { 651c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 652c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 653fef45726SSatish Balay baij_a[jj] = *value++; 654fef45726SSatish Balay } 655fef45726SSatish Balay } 656fef45726SSatish Balay } 6570bdbc534SSatish Balay } else { 6580bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 659fef45726SSatish Balay if (addv == ADD_VALUES) { 660b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6610bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 662fef45726SSatish Balay baij_a[jj] += *value++; 663fef45726SSatish Balay } 664fef45726SSatish Balay } 665fef45726SSatish Balay } else { 666fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 667fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 668fef45726SSatish Balay baij_a[jj] = *value++; 669fef45726SSatish Balay } 670b4cc0f5aSSatish Balay } 6710bdbc534SSatish Balay } 6720bdbc534SSatish Balay } 6730bdbc534SSatish Balay } 6740bdbc534SSatish Balay } else { 6750bdbc534SSatish Balay if (!baij->donotstash) { 6760bdbc534SSatish Balay if (roworiented) { 6778798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6780bdbc534SSatish Balay } else { 6798798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6800bdbc534SSatish Balay } 6810bdbc534SSatish Balay } 6820bdbc534SSatish Balay } 6830bdbc534SSatish Balay } 6842515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 685187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 686187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 687187ce0cbSSatish Balay #endif 6880bdbc534SSatish Balay PetscFunctionReturn(0); 6890bdbc534SSatish Balay } 690133cdb44SSatish Balay 6914a2ae208SSatish Balay #undef __FUNCT__ 6924a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 693b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 694d6de1c52SSatish Balay { 695d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6966849ba73SBarry Smith PetscErrorCode ierr; 697899cda47SBarry Smith PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 698899cda47SBarry Smith PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 699d6de1c52SSatish Balay 700133cdb44SSatish Balay PetscFunctionBegin; 701d6de1c52SSatish Balay for (i=0; i<m; i++) { 70277431f27SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 703899cda47SBarry Smith if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 704d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 705d6de1c52SSatish Balay row = idxm[i] - bsrstart; 706d6de1c52SSatish Balay for (j=0; j<n; j++) { 70777431f27SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 708899cda47SBarry Smith if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 709d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 710d6de1c52SSatish Balay col = idxn[j] - bscstart; 71198dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 712d64ed03dSBarry Smith } else { 713905e6a2fSBarry Smith if (!baij->colmap) { 714905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 715905e6a2fSBarry Smith } 716aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7170f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 718fa46199cSSatish Balay data --; 71948e59246SSatish Balay #else 72048e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 72148e59246SSatish Balay #endif 72248e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 723d9d09a02SSatish Balay else { 72448e59246SSatish Balay col = data + idxn[j]%bs; 72598dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 726d6de1c52SSatish Balay } 727d6de1c52SSatish Balay } 728d6de1c52SSatish Balay } 729d64ed03dSBarry Smith } else { 73029bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 731d6de1c52SSatish Balay } 732d6de1c52SSatish Balay } 7333a40ed3dSBarry Smith PetscFunctionReturn(0); 734d6de1c52SSatish Balay } 735d6de1c52SSatish Balay 7364a2ae208SSatish Balay #undef __FUNCT__ 7374a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 738dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 739d6de1c52SSatish Balay { 740d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 741d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 742dfbe8321SBarry Smith PetscErrorCode ierr; 743899cda47SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 744329f5518SBarry Smith PetscReal sum = 0.0; 7453eda8832SBarry Smith MatScalar *v; 746d6de1c52SSatish Balay 747d64ed03dSBarry Smith PetscFunctionBegin; 748d6de1c52SSatish Balay if (baij->size == 1) { 749064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 750d6de1c52SSatish Balay } else { 751d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 752d6de1c52SSatish Balay v = amat->a; 7538a62d963SHong Zhang nz = amat->nz*bs2; 7548a62d963SHong Zhang for (i=0; i<nz; i++) { 755aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 756329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 757d6de1c52SSatish Balay #else 758d6de1c52SSatish Balay sum += (*v)*(*v); v++; 759d6de1c52SSatish Balay #endif 760d6de1c52SSatish Balay } 761d6de1c52SSatish Balay v = bmat->a; 7628a62d963SHong Zhang nz = bmat->nz*bs2; 7638a62d963SHong Zhang for (i=0; i<nz; i++) { 764aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 765329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 766d6de1c52SSatish Balay #else 767d6de1c52SSatish Balay sum += (*v)*(*v); v++; 768d6de1c52SSatish Balay #endif 769d6de1c52SSatish Balay } 770064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 771064f8208SBarry Smith *nrm = sqrt(*nrm); 7728a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 7738a62d963SHong Zhang PetscReal *tmp,*tmp2; 774899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 775899cda47SBarry Smith ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 776899cda47SBarry Smith tmp2 = tmp + mat->cmap.N; 777899cda47SBarry Smith ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 7788a62d963SHong Zhang v = amat->a; jj = amat->j; 7798a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 7808a62d963SHong Zhang for (j=0; j<bs; j++){ 7818a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 7828a62d963SHong Zhang for (row=0; row<bs; row++){ 7838a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7848a62d963SHong Zhang } 7858a62d963SHong Zhang } 7868a62d963SHong Zhang jj++; 7878a62d963SHong Zhang } 7888a62d963SHong Zhang v = bmat->a; jj = bmat->j; 7898a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 7908a62d963SHong Zhang for (j=0; j<bs; j++){ 7918a62d963SHong Zhang col = bs*garray[*jj] + j; 7928a62d963SHong Zhang for (row=0; row<bs; row++){ 7938a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7948a62d963SHong Zhang } 7958a62d963SHong Zhang } 7968a62d963SHong Zhang jj++; 7978a62d963SHong Zhang } 798899cda47SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 7998a62d963SHong Zhang *nrm = 0.0; 800899cda47SBarry Smith for (j=0; j<mat->cmap.N; j++) { 8018a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8028a62d963SHong Zhang } 8038a62d963SHong Zhang ierr = PetscFree(tmp);CHKERRQ(ierr); 8048a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 805577dd1f9SKris Buschelman PetscReal *sums; 806577dd1f9SKris Buschelman ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 8078a62d963SHong Zhang sum = 0.0; 8088a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 8098a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 8108a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 8118a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 8128a62d963SHong Zhang for (i=0; i<nz; i++) { 8138a62d963SHong Zhang for (col=0; col<bs; col++){ 8148a62d963SHong Zhang for (row=0; row<bs; row++){ 8158a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8168a62d963SHong Zhang } 8178a62d963SHong Zhang } 8188a62d963SHong Zhang } 8198a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 8208a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 8218a62d963SHong Zhang for (i=0; i<nz; i++) { 8228a62d963SHong Zhang for (col=0; col<bs; col++){ 8238a62d963SHong Zhang for (row=0; row<bs; row++){ 8248a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8258a62d963SHong Zhang } 8268a62d963SHong Zhang } 8278a62d963SHong Zhang } 8288a62d963SHong Zhang for (row=0; row<bs; row++){ 8298a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 8308a62d963SHong Zhang } 8318a62d963SHong Zhang } 8328a62d963SHong Zhang ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 833577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 834d64ed03dSBarry Smith } else { 83529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836d6de1c52SSatish Balay } 837d64ed03dSBarry Smith } 8383a40ed3dSBarry Smith PetscFunctionReturn(0); 839d6de1c52SSatish Balay } 84057b952d6SSatish Balay 841fef45726SSatish Balay /* 842fef45726SSatish Balay Creates the hash table, and sets the table 843fef45726SSatish Balay This table is created only once. 844fef45726SSatish Balay If new entried need to be added to the matrix 845fef45726SSatish Balay then the hash table has to be destroyed and 846fef45726SSatish Balay recreated. 847fef45726SSatish Balay */ 8484a2ae208SSatish Balay #undef __FUNCT__ 8494a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851596b8d2eSBarry Smith { 852596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 854596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8566849ba73SBarry Smith PetscErrorCode ierr; 857899cda47SBarry Smith PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 858899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859b24ad042SBarry Smith PetscInt *HT,key; 8603eda8832SBarry Smith MatScalar **HD; 861329f5518SBarry Smith PetscReal tmp; 8626cf91177SBarry Smith #if defined(PETSC_USE_INFO) 863b24ad042SBarry Smith PetscInt ct=0,max=0; 8644a15367fSSatish Balay #endif 865fef45726SSatish Balay 866d64ed03dSBarry Smith PetscFunctionBegin; 867b24ad042SBarry Smith baij->ht_size=(PetscInt)(factor*nz); 8680bdbc534SSatish Balay size = baij->ht_size; 869fef45726SSatish Balay 8700bdbc534SSatish Balay if (baij->ht) { 8710bdbc534SSatish Balay PetscFunctionReturn(0); 872596b8d2eSBarry Smith } 8730bdbc534SSatish Balay 874fef45726SSatish Balay /* Allocate Memory for Hash Table */ 875b24ad042SBarry Smith ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876b24ad042SBarry Smith baij->ht = (PetscInt*)(baij->hd + size); 877b9e4cc15SSatish Balay HD = baij->hd; 878a07cd24cSSatish Balay HT = baij->ht; 879b9e4cc15SSatish Balay 880b9e4cc15SSatish Balay 881b24ad042SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8820bdbc534SSatish Balay 883596b8d2eSBarry Smith 884596b8d2eSBarry Smith /* Loop Over A */ 8850bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 886596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8870bdbc534SSatish Balay row = i+rstart; 8880bdbc534SSatish Balay col = aj[j]+cstart; 889596b8d2eSBarry Smith 890187ce0cbSSatish Balay key = row*Nbs + col + 1; 891c2760754SSatish Balay h1 = HASH(size,key,tmp); 8920bdbc534SSatish Balay for (k=0; k<size; k++){ 893958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 8940bdbc534SSatish Balay HT[(h1+k)%size] = key; 8950bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 896596b8d2eSBarry Smith break; 8976cf91177SBarry Smith #if defined(PETSC_USE_INFO) 898187ce0cbSSatish Balay } else { 899187ce0cbSSatish Balay ct++; 900187ce0cbSSatish Balay #endif 901596b8d2eSBarry Smith } 902187ce0cbSSatish Balay } 9036cf91177SBarry Smith #if defined(PETSC_USE_INFO) 904187ce0cbSSatish Balay if (k> max) max = k; 905187ce0cbSSatish Balay #endif 906596b8d2eSBarry Smith } 907596b8d2eSBarry Smith } 908596b8d2eSBarry Smith /* Loop Over B */ 9090bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 910596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9110bdbc534SSatish Balay row = i+rstart; 9120bdbc534SSatish Balay col = garray[bj[j]]; 913187ce0cbSSatish Balay key = row*Nbs + col + 1; 914c2760754SSatish Balay h1 = HASH(size,key,tmp); 9150bdbc534SSatish Balay for (k=0; k<size; k++){ 916958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9170bdbc534SSatish Balay HT[(h1+k)%size] = key; 9180bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 919596b8d2eSBarry Smith break; 9206cf91177SBarry Smith #if defined(PETSC_USE_INFO) 921187ce0cbSSatish Balay } else { 922187ce0cbSSatish Balay ct++; 923187ce0cbSSatish Balay #endif 924596b8d2eSBarry Smith } 925187ce0cbSSatish Balay } 9266cf91177SBarry Smith #if defined(PETSC_USE_INFO) 927187ce0cbSSatish Balay if (k> max) max = k; 928187ce0cbSSatish Balay #endif 929596b8d2eSBarry Smith } 930596b8d2eSBarry Smith } 931596b8d2eSBarry Smith 932596b8d2eSBarry Smith /* Print Summary */ 9336cf91177SBarry Smith #if defined(PETSC_USE_INFO) 934c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 935596b8d2eSBarry Smith if (HT[i]) {j++;} 936c38d4ed2SBarry Smith } 937ae15b995SBarry Smith ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 938187ce0cbSSatish Balay #endif 9393a40ed3dSBarry Smith PetscFunctionReturn(0); 940596b8d2eSBarry Smith } 94157b952d6SSatish Balay 9424a2ae208SSatish Balay #undef __FUNCT__ 9434a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945bbb85fb3SSatish Balay { 946bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947dfbe8321SBarry Smith PetscErrorCode ierr; 948b24ad042SBarry Smith PetscInt nstash,reallocs; 949bbb85fb3SSatish Balay InsertMode addv; 950bbb85fb3SSatish Balay 951bbb85fb3SSatish Balay PetscFunctionBegin; 952bbb85fb3SSatish Balay if (baij->donotstash) { 953bbb85fb3SSatish Balay PetscFunctionReturn(0); 954bbb85fb3SSatish Balay } 955bbb85fb3SSatish Balay 956bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 957bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 958bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 95929bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 960bbb85fb3SSatish Balay } 961bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 962bbb85fb3SSatish Balay 963899cda47SBarry Smith ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 964899cda47SBarry Smith ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 9658798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 966ae15b995SBarry Smith ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 96746680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 968ae15b995SBarry Smith ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 969bbb85fb3SSatish Balay PetscFunctionReturn(0); 970bbb85fb3SSatish Balay } 971bbb85fb3SSatish Balay 9724a2ae208SSatish Balay #undef __FUNCT__ 9734a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975bbb85fb3SSatish Balay { 976bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 97791c97fd4SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 9786849ba73SBarry Smith PetscErrorCode ierr; 979b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 980b24ad042SBarry Smith PetscInt *row,*col,other_disassembled; 9817c922b88SBarry Smith PetscTruth r1,r2,r3; 9823eda8832SBarry Smith MatScalar *val; 983bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 984b24ad042SBarry Smith PetscMPIInt n; 985bbb85fb3SSatish Balay 98691c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 987bbb85fb3SSatish Balay PetscFunctionBegin; 988bbb85fb3SSatish Balay if (!baij->donotstash) { 989a2d1c673SSatish Balay while (1) { 9908798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 991a2d1c673SSatish Balay if (!flg) break; 992a2d1c673SSatish Balay 993bbb85fb3SSatish Balay for (i=0; i<n;) { 994bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 995bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 996bbb85fb3SSatish Balay if (j < n) ncols = j-i; 997bbb85fb3SSatish Balay else ncols = n-i; 998bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 99993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1000bbb85fb3SSatish Balay i = j; 1001bbb85fb3SSatish Balay } 1002bbb85fb3SSatish Balay } 10038798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1004a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1005a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1006a2d1c673SSatish Balay restore the original flags */ 1007a2d1c673SSatish Balay r1 = baij->roworiented; 1008a2d1c673SSatish Balay r2 = a->roworiented; 100991c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 10107c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10117c922b88SBarry Smith a->roworiented = PETSC_FALSE; 101291c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1013a2d1c673SSatish Balay while (1) { 10148798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1015a2d1c673SSatish Balay if (!flg) break; 1016a2d1c673SSatish Balay 1017a2d1c673SSatish Balay for (i=0; i<n;) { 1018a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1019a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1020a2d1c673SSatish Balay if (j < n) ncols = j-i; 1021a2d1c673SSatish Balay else ncols = n-i; 102293fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1023a2d1c673SSatish Balay i = j; 1024a2d1c673SSatish Balay } 1025a2d1c673SSatish Balay } 10268798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1027a2d1c673SSatish Balay baij->roworiented = r1; 1028a2d1c673SSatish Balay a->roworiented = r2; 102991c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1030bbb85fb3SSatish Balay } 1031bbb85fb3SSatish Balay 1032bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1033bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1034bbb85fb3SSatish Balay 1035bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1036bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1037bbb85fb3SSatish Balay /* 1038bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1039bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1040bbb85fb3SSatish Balay */ 1041bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1042bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1043bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1044bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1045bbb85fb3SSatish Balay } 1046bbb85fb3SSatish Balay } 1047bbb85fb3SSatish Balay 1048bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1049bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1050bbb85fb3SSatish Balay } 105191c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1052bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1053bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1054bbb85fb3SSatish Balay 10556cf91177SBarry Smith #if defined(PETSC_USE_INFO) 1056bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1057ae15b995SBarry Smith ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1058bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1059bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1060bbb85fb3SSatish Balay } 1061bbb85fb3SSatish Balay #endif 1062bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1063bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1064bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1065bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1066bbb85fb3SSatish Balay } 1067bbb85fb3SSatish Balay 1068606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069606d414cSSatish Balay baij->rowvalues = 0; 1070bbb85fb3SSatish Balay PetscFunctionReturn(0); 1071bbb85fb3SSatish Balay } 107257b952d6SSatish Balay 10734a2ae208SSatish Balay #undef __FUNCT__ 10744a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10756849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107657b952d6SSatish Balay { 107757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1078dfbe8321SBarry Smith PetscErrorCode ierr; 1079b24ad042SBarry Smith PetscMPIInt size = baij->size,rank = baij->rank; 1080899cda47SBarry Smith PetscInt bs = mat->rmap.bs; 108132077d6dSBarry Smith PetscTruth iascii,isdraw; 1082b0a32e0cSBarry Smith PetscViewer sviewer; 1083f3ef73ceSBarry Smith PetscViewerFormat format; 108457b952d6SSatish Balay 1085d64ed03dSBarry Smith PetscFunctionBegin; 1086f81bd7ccSHong Zhang /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 108732077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1088fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 108932077d6dSBarry Smith if (iascii) { 1090b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1091456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10924e220ebcSLois Curfman McInnes MatInfo info; 1093d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1094d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 109577431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1096899cda47SBarry Smith rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1097899cda47SBarry Smith mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1098d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 109977431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 110177431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 110357b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 11043a40ed3dSBarry Smith PetscFunctionReturn(0); 1105fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 110677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 11073a40ed3dSBarry Smith PetscFunctionReturn(0); 110804929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 110904929863SHong Zhang PetscFunctionReturn(0); 111057b952d6SSatish Balay } 111157b952d6SSatish Balay } 111257b952d6SSatish Balay 11130f5bd95cSBarry Smith if (isdraw) { 1114b0a32e0cSBarry Smith PetscDraw draw; 111557b952d6SSatish Balay PetscTruth isnull; 1116b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1117b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 111857b952d6SSatish Balay } 111957b952d6SSatish Balay 112057b952d6SSatish Balay if (size == 1) { 1121873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 112257b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1123d64ed03dSBarry Smith } else { 112457b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 112557b952d6SSatish Balay Mat A; 112657b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1127899cda47SBarry Smith PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11283eda8832SBarry Smith MatScalar *a; 112957b952d6SSatish Balay 1130f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1131f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 1132f69a0ea3SMatthew Knepley ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 113357b952d6SSatish Balay if (!rank) { 1134f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1135d64ed03dSBarry Smith } else { 1136f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 113757b952d6SSatish Balay } 1138f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1139899cda47SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 114052e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 114157b952d6SSatish Balay 114257b952d6SSatish Balay /* copy over the A part */ 114357b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 114457b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1145b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 114657b952d6SSatish Balay 114757b952d6SSatish Balay for (i=0; i<mbs; i++) { 1148899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 114957b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1151899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 115257b952d6SSatish Balay for (k=0; k<bs; k++) { 115393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1154cee3aa6bSSatish Balay col++; a += bs; 115557b952d6SSatish Balay } 115657b952d6SSatish Balay } 115757b952d6SSatish Balay } 115857b952d6SSatish Balay /* copy over the B part */ 115957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 116057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 116157b952d6SSatish Balay for (i=0; i<mbs; i++) { 1162899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 116357b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 116457b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 116557b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 116657b952d6SSatish Balay for (k=0; k<bs; k++) { 116793fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1168cee3aa6bSSatish Balay col++; a += bs; 116957b952d6SSatish Balay } 117057b952d6SSatish Balay } 117157b952d6SSatish Balay } 1172606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11736d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11746d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117555843e3eSBarry Smith /* 117655843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1177b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 117855843e3eSBarry Smith */ 1179b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1180f14a1c24SBarry Smith if (!rank) { 1181e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11826831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 118357b952d6SSatish Balay } 1184b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 118557b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 118657b952d6SSatish Balay } 11873a40ed3dSBarry Smith PetscFunctionReturn(0); 118857b952d6SSatish Balay } 118957b952d6SSatish Balay 11904a2ae208SSatish Balay #undef __FUNCT__ 11914a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1192dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 119357b952d6SSatish Balay { 1194dfbe8321SBarry Smith PetscErrorCode ierr; 119532077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 119657b952d6SSatish Balay 1197d64ed03dSBarry Smith PetscFunctionBegin; 119832077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1199fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1200b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1201fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 120232077d6dSBarry Smith if (iascii || isdraw || issocket || isbinary) { 12037b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12045cd90555SBarry Smith } else { 120579a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 120657b952d6SSatish Balay } 12073a40ed3dSBarry Smith PetscFunctionReturn(0); 120857b952d6SSatish Balay } 120957b952d6SSatish Balay 12104a2ae208SSatish Balay #undef __FUNCT__ 12114a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1212dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 121379bdfe76SSatish Balay { 121479bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1215dfbe8321SBarry Smith PetscErrorCode ierr; 121679bdfe76SSatish Balay 1217d64ed03dSBarry Smith PetscFunctionBegin; 1218aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1219899cda47SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 122079bdfe76SSatish Balay #endif 12218798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12228798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 122379bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 122479bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1225aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12260f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 122748e59246SSatish Balay #else 122805b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 122948e59246SSatish Balay #endif 123005b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1231606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1232606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 123305b42c5fSBarry Smith ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 123405b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 123505b42c5fSBarry Smith ierr = PetscFree(baij->hd);CHKERRQ(ierr); 12366fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 123705b42c5fSBarry Smith ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 12386fa18ffdSBarry Smith #endif 1239899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1240606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1241901853e0SKris Buschelman 1242901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1243901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1244901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1245901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1246aac34f13SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1247901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1248901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 12493a40ed3dSBarry Smith PetscFunctionReturn(0); 125079bdfe76SSatish Balay } 125179bdfe76SSatish Balay 12524a2ae208SSatish Balay #undef __FUNCT__ 12534a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1254dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1255cee3aa6bSSatish Balay { 1256cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257dfbe8321SBarry Smith PetscErrorCode ierr; 1258b24ad042SBarry Smith PetscInt nt; 1259cee3aa6bSSatish Balay 1260d64ed03dSBarry Smith PetscFunctionBegin; 1261e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1262899cda47SBarry Smith if (nt != A->cmap.n) { 126329bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 126447b4a8eaSLois Curfman McInnes } 1265e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1266899cda47SBarry Smith if (nt != A->rmap.n) { 126729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 126847b4a8eaSLois Curfman McInnes } 126943a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 127143a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 12733a40ed3dSBarry Smith PetscFunctionReturn(0); 1274cee3aa6bSSatish Balay } 1275cee3aa6bSSatish Balay 12764a2ae208SSatish Balay #undef __FUNCT__ 12774a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1278dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1279cee3aa6bSSatish Balay { 1280cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1281dfbe8321SBarry Smith PetscErrorCode ierr; 1282d64ed03dSBarry Smith 1283d64ed03dSBarry Smith PetscFunctionBegin; 128443a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1285f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 128643a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1287f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12883a40ed3dSBarry Smith PetscFunctionReturn(0); 1289cee3aa6bSSatish Balay } 1290cee3aa6bSSatish Balay 12914a2ae208SSatish Balay #undef __FUNCT__ 12924a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1293dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1294cee3aa6bSSatish Balay { 1295cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1296dfbe8321SBarry Smith PetscErrorCode ierr; 1297a5ff213dSBarry Smith PetscTruth merged; 1298cee3aa6bSSatish Balay 1299d64ed03dSBarry Smith PetscFunctionBegin; 1300a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1301cee3aa6bSSatish Balay /* do nondiagonal part */ 13027c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1303a5ff213dSBarry Smith if (!merged) { 1304cee3aa6bSSatish Balay /* send it on its way */ 1305537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1306cee3aa6bSSatish Balay /* do local part */ 13077c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1308cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1309a5ff213dSBarry Smith /* inserted in yy until the next line */ 1310639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1311a5ff213dSBarry Smith } else { 1312a5ff213dSBarry Smith /* do local part */ 1313a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1314a5ff213dSBarry Smith /* send it on its way */ 1315a5ff213dSBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1317a5ff213dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318a5ff213dSBarry Smith } 13193a40ed3dSBarry Smith PetscFunctionReturn(0); 1320cee3aa6bSSatish Balay } 1321cee3aa6bSSatish Balay 13224a2ae208SSatish Balay #undef __FUNCT__ 13234a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1324dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1325cee3aa6bSSatish Balay { 1326cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327dfbe8321SBarry Smith PetscErrorCode ierr; 1328cee3aa6bSSatish Balay 1329d64ed03dSBarry Smith PetscFunctionBegin; 1330cee3aa6bSSatish Balay /* do nondiagonal part */ 13317c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1332cee3aa6bSSatish Balay /* send it on its way */ 1333537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1334cee3aa6bSSatish Balay /* do local part */ 13357c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1336cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1337cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1338cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1339537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13403a40ed3dSBarry Smith PetscFunctionReturn(0); 1341cee3aa6bSSatish Balay } 1342cee3aa6bSSatish Balay 1343cee3aa6bSSatish Balay /* 1344cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1345cee3aa6bSSatish Balay diagonal block 1346cee3aa6bSSatish Balay */ 13474a2ae208SSatish Balay #undef __FUNCT__ 13484a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1349dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1350cee3aa6bSSatish Balay { 1351cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1352dfbe8321SBarry Smith PetscErrorCode ierr; 1353d64ed03dSBarry Smith 1354d64ed03dSBarry Smith PetscFunctionBegin; 1355899cda47SBarry Smith if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13563a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13573a40ed3dSBarry Smith PetscFunctionReturn(0); 1358cee3aa6bSSatish Balay } 1359cee3aa6bSSatish Balay 13604a2ae208SSatish Balay #undef __FUNCT__ 13614a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1362f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1363cee3aa6bSSatish Balay { 1364cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1365dfbe8321SBarry Smith PetscErrorCode ierr; 1366d64ed03dSBarry Smith 1367d64ed03dSBarry Smith PetscFunctionBegin; 1368f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1369f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 13703a40ed3dSBarry Smith PetscFunctionReturn(0); 1371cee3aa6bSSatish Balay } 1372026e39d0SSatish Balay 13734a2ae208SSatish Balay #undef __FUNCT__ 13744a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1375b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376acdf5bf4SSatish Balay { 1377acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 137887828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13796849ba73SBarry Smith PetscErrorCode ierr; 1380899cda47SBarry Smith PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1381899cda47SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1382899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1383acdf5bf4SSatish Balay 1384d64ed03dSBarry Smith PetscFunctionBegin; 1385abc0a331SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1386acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1387acdf5bf4SSatish Balay 1388acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1389acdf5bf4SSatish Balay /* 1390acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1391acdf5bf4SSatish Balay */ 1392acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1393b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1394bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1395acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1396acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1397acdf5bf4SSatish Balay } 1398b24ad042SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1399b24ad042SBarry Smith mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1400acdf5bf4SSatish Balay } 1401acdf5bf4SSatish Balay 140229bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1403d9d09a02SSatish Balay lrow = row - brstart; 1404acdf5bf4SSatish Balay 1405acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1406acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1407acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1408f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1409f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1410acdf5bf4SSatish Balay nztot = nzA + nzB; 1411acdf5bf4SSatish Balay 1412acdf5bf4SSatish Balay cmap = mat->garray; 1413acdf5bf4SSatish Balay if (v || idx) { 1414acdf5bf4SSatish Balay if (nztot) { 1415acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1416b24ad042SBarry Smith PetscInt imark = -1; 1417acdf5bf4SSatish Balay if (v) { 1418acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1419acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1420d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1421acdf5bf4SSatish Balay else break; 1422acdf5bf4SSatish Balay } 1423acdf5bf4SSatish Balay imark = i; 1424acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1425acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1426acdf5bf4SSatish Balay } 1427acdf5bf4SSatish Balay if (idx) { 1428acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1429acdf5bf4SSatish Balay if (imark > -1) { 1430acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1431bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1432acdf5bf4SSatish Balay } 1433acdf5bf4SSatish Balay } else { 1434acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1435d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1436d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1437acdf5bf4SSatish Balay else break; 1438acdf5bf4SSatish Balay } 1439acdf5bf4SSatish Balay imark = i; 1440acdf5bf4SSatish Balay } 1441d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1442d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1443acdf5bf4SSatish Balay } 1444d64ed03dSBarry Smith } else { 1445d212a18eSSatish Balay if (idx) *idx = 0; 1446d212a18eSSatish Balay if (v) *v = 0; 1447d212a18eSSatish Balay } 1448acdf5bf4SSatish Balay } 1449acdf5bf4SSatish Balay *nz = nztot; 1450f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1451f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14523a40ed3dSBarry Smith PetscFunctionReturn(0); 1453acdf5bf4SSatish Balay } 1454acdf5bf4SSatish Balay 14554a2ae208SSatish Balay #undef __FUNCT__ 14564a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1457b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1458acdf5bf4SSatish Balay { 1459acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1460d64ed03dSBarry Smith 1461d64ed03dSBarry Smith PetscFunctionBegin; 1462abc0a331SBarry Smith if (!baij->getrowactive) { 146329bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1464acdf5bf4SSatish Balay } 1465acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14663a40ed3dSBarry Smith PetscFunctionReturn(0); 1467acdf5bf4SSatish Balay } 1468acdf5bf4SSatish Balay 14694a2ae208SSatish Balay #undef __FUNCT__ 14704a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1471dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 147258667388SSatish Balay { 147358667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1474dfbe8321SBarry Smith PetscErrorCode ierr; 1475d64ed03dSBarry Smith 1476d64ed03dSBarry Smith PetscFunctionBegin; 147758667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 147858667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14793a40ed3dSBarry Smith PetscFunctionReturn(0); 148058667388SSatish Balay } 14810ac07820SSatish Balay 14824a2ae208SSatish Balay #undef __FUNCT__ 14834a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1484dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14850ac07820SSatish Balay { 14864e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14874e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1488dfbe8321SBarry Smith PetscErrorCode ierr; 1489329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14900ac07820SSatish Balay 1491d64ed03dSBarry Smith PetscFunctionBegin; 1492899cda47SBarry Smith info->block_size = (PetscReal)matin->rmap.bs; 14934e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14940e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1495de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14964e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14970e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1498de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 14990ac07820SSatish Balay if (flag == MAT_LOCAL) { 15004e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15014e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15024e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15034e220ebcSLois Curfman McInnes info->memory = isend[3]; 15044e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15050ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1506d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 15074e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15084e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15094e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15104e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15114e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15120ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1513d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 15144e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15154e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15164e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15174e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15184e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1519d41123aaSBarry Smith } else { 152077431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15210ac07820SSatish Balay } 1522899cda47SBarry Smith info->rows_global = (PetscReal)A->rmap.N; 1523899cda47SBarry Smith info->columns_global = (PetscReal)A->cmap.N; 1524899cda47SBarry Smith info->rows_local = (PetscReal)A->rmap.N; 1525899cda47SBarry Smith info->columns_local = (PetscReal)A->cmap.N; 15264e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15274e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15284e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15293a40ed3dSBarry Smith PetscFunctionReturn(0); 15300ac07820SSatish Balay } 15310ac07820SSatish Balay 15324a2ae208SSatish Balay #undef __FUNCT__ 15334a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1534dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 153558667388SSatish Balay { 153658667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1537dfbe8321SBarry Smith PetscErrorCode ierr; 153858667388SSatish Balay 1539d64ed03dSBarry Smith PetscFunctionBegin; 154012c028f9SKris Buschelman switch (op) { 154112c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 154212c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 154312c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 154412c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 154512c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 154612c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 154712c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 154898305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 154998305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155012c028f9SKris Buschelman break; 155112c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15527c922b88SBarry Smith a->roworiented = PETSC_TRUE; 155398305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155498305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155512c028f9SKris Buschelman break; 155612c028f9SKris Buschelman case MAT_ROWS_SORTED: 155712c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 155812c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1559ae15b995SBarry Smith ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 156012c028f9SKris Buschelman break; 156112c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15627c922b88SBarry Smith a->roworiented = PETSC_FALSE; 156398305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 156498305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 156512c028f9SKris Buschelman break; 156612c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15677c922b88SBarry Smith a->donotstash = PETSC_TRUE; 156812c028f9SKris Buschelman break; 156912c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 157029bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 157112c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15727c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 157312c028f9SKris Buschelman break; 157477e54ba9SKris Buschelman case MAT_SYMMETRIC: 157577e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15762188ac68SBarry Smith case MAT_HERMITIAN: 15772188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 15782188ac68SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 15792188ac68SBarry Smith break; 15809a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 15819a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 15829a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 15839a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 158477e54ba9SKris Buschelman break; 158512c028f9SKris Buschelman default: 1586ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1587d64ed03dSBarry Smith } 15883a40ed3dSBarry Smith PetscFunctionReturn(0); 158958667388SSatish Balay } 159058667388SSatish Balay 15914a2ae208SSatish Balay #undef __FUNCT__ 15924a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1593dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15940ac07820SSatish Balay { 15950ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15960ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15970ac07820SSatish Balay Mat B; 1598dfbe8321SBarry Smith PetscErrorCode ierr; 1599899cda47SBarry Smith PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1600899cda47SBarry Smith PetscInt bs=A->rmap.bs,mbs=baij->mbs; 16013eda8832SBarry Smith MatScalar *a; 16020ac07820SSatish Balay 1603d64ed03dSBarry Smith PetscFunctionBegin; 160429bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1605f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1606899cda47SBarry Smith ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1607f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1608899cda47SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 16090ac07820SSatish Balay 16100ac07820SSatish Balay /* copy over the A part */ 16110ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16120ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1613b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 16140ac07820SSatish Balay 16150ac07820SSatish Balay for (i=0; i<mbs; i++) { 1616899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 16170ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16180ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1619899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 16200ac07820SSatish Balay for (k=0; k<bs; k++) { 162193fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16220ac07820SSatish Balay col++; a += bs; 16230ac07820SSatish Balay } 16240ac07820SSatish Balay } 16250ac07820SSatish Balay } 16260ac07820SSatish Balay /* copy over the B part */ 16270ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16280ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16290ac07820SSatish Balay for (i=0; i<mbs; i++) { 1630899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 16310ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16320ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16330ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16340ac07820SSatish Balay for (k=0; k<bs; k++) { 163593fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16360ac07820SSatish Balay col++; a += bs; 16370ac07820SSatish Balay } 16380ac07820SSatish Balay } 16390ac07820SSatish Balay } 1640606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16410ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16420ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16430ac07820SSatish Balay 16447c922b88SBarry Smith if (matout) { 16450ac07820SSatish Balay *matout = B; 16460ac07820SSatish Balay } else { 1647273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16480ac07820SSatish Balay } 16493a40ed3dSBarry Smith PetscFunctionReturn(0); 16500ac07820SSatish Balay } 16510e95ebc0SSatish Balay 16524a2ae208SSatish Balay #undef __FUNCT__ 16534a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1654dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16550e95ebc0SSatish Balay { 165636c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 165736c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1658dfbe8321SBarry Smith PetscErrorCode ierr; 1659b24ad042SBarry Smith PetscInt s1,s2,s3; 16600e95ebc0SSatish Balay 1661d64ed03dSBarry Smith PetscFunctionBegin; 166236c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 166336c4a09eSSatish Balay if (rr) { 166436c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 166529bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 166636c4a09eSSatish Balay /* Overlap communication with computation. */ 166736c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 166836c4a09eSSatish Balay } 16690e95ebc0SSatish Balay if (ll) { 16700e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 167129bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1672a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16730e95ebc0SSatish Balay } 167436c4a09eSSatish Balay /* scale the diagonal block */ 167536c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 167636c4a09eSSatish Balay 167736c4a09eSSatish Balay if (rr) { 167836c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 167936c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1680a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 168136c4a09eSSatish Balay } 168236c4a09eSSatish Balay 16833a40ed3dSBarry Smith PetscFunctionReturn(0); 16840e95ebc0SSatish Balay } 16850e95ebc0SSatish Balay 16864a2ae208SSatish Balay #undef __FUNCT__ 16874a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1688f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 16890ac07820SSatish Balay { 16900ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16916849ba73SBarry Smith PetscErrorCode ierr; 1692b24ad042SBarry Smith PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1693899cda47SBarry Smith PetscInt i,*owners = A->rmap.range; 1694b24ad042SBarry Smith PetscInt *nprocs,j,idx,nsends,row; 1695b24ad042SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 16966543fbbaSBarry Smith PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1697357c27ecSBarry Smith PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 16980ac07820SSatish Balay MPI_Comm comm = A->comm; 16990ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17000ac07820SSatish Balay MPI_Status recv_status,*send_status; 17016543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17026543fbbaSBarry Smith PetscTruth found = PETSC_FALSE; 17036543fbbaSBarry Smith #endif 17040ac07820SSatish Balay 1705d64ed03dSBarry Smith PetscFunctionBegin; 17060ac07820SSatish Balay /* first count number of contributors to each processor */ 1707b24ad042SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1708b24ad042SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1709b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 17106543fbbaSBarry Smith j = 0; 17110ac07820SSatish Balay for (i=0; i<N; i++) { 17126543fbbaSBarry Smith if (lastidx > (idx = rows[i])) j = 0; 17136543fbbaSBarry Smith lastidx = idx; 17146543fbbaSBarry Smith for (; j<size; j++) { 1715357c27ecSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 17166543fbbaSBarry Smith nprocs[2*j]++; 17176543fbbaSBarry Smith nprocs[2*j+1] = 1; 17186543fbbaSBarry Smith owner[i] = j; 17196543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17206543fbbaSBarry Smith found = PETSC_TRUE; 17216543fbbaSBarry Smith #endif 17226543fbbaSBarry Smith break; 17230ac07820SSatish Balay } 17240ac07820SSatish Balay } 17256543fbbaSBarry Smith #if defined(PETSC_DEBUG) 172629bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17276543fbbaSBarry Smith found = PETSC_FALSE; 17286543fbbaSBarry Smith #endif 17290ac07820SSatish Balay } 1730c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17310ac07820SSatish Balay 17320ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1733c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17340ac07820SSatish Balay 17350ac07820SSatish Balay /* post receives: */ 1736b24ad042SBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1737b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17380ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1739b24ad042SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17400ac07820SSatish Balay } 17410ac07820SSatish Balay 17420ac07820SSatish Balay /* do sends: 17430ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17440ac07820SSatish Balay the ith processor 17450ac07820SSatish Balay */ 1746b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1747b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1748b24ad042SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 17490ac07820SSatish Balay starts[0] = 0; 1750c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17510ac07820SSatish Balay for (i=0; i<N; i++) { 17520ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17530ac07820SSatish Balay } 17540ac07820SSatish Balay 17550ac07820SSatish Balay starts[0] = 0; 1756c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17570ac07820SSatish Balay count = 0; 17580ac07820SSatish Balay for (i=0; i<size; i++) { 1759c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1760b24ad042SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17610ac07820SSatish Balay } 17620ac07820SSatish Balay } 1763606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17640ac07820SSatish Balay 1765357c27ecSBarry Smith base = owners[rank]; 17660ac07820SSatish Balay 17670ac07820SSatish Balay /* wait on receives */ 1768b24ad042SBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 17690ac07820SSatish Balay source = lens + nrecvs; 17700ac07820SSatish Balay count = nrecvs; slen = 0; 17710ac07820SSatish Balay while (count) { 1772ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17730ac07820SSatish Balay /* unpack receives into our local space */ 1774b24ad042SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 17750ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17760ac07820SSatish Balay lens[imdex] = n; 17770ac07820SSatish Balay slen += n; 17780ac07820SSatish Balay count--; 17790ac07820SSatish Balay } 1780606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17810ac07820SSatish Balay 17820ac07820SSatish Balay /* move the data into the send scatter */ 1783b24ad042SBarry Smith ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 17840ac07820SSatish Balay count = 0; 17850ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17860ac07820SSatish Balay values = rvalues + i*nmax; 17870ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17880ac07820SSatish Balay lrows[count++] = values[j] - base; 17890ac07820SSatish Balay } 17900ac07820SSatish Balay } 1791606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1792606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1793606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1794606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17950ac07820SSatish Balay 17960ac07820SSatish Balay /* actually zap the local rows */ 179772dacd9aSBarry Smith /* 179872dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1799a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 180072dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180172dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180272dacd9aSBarry Smith 1803f4df32b1SMatthew Knepley Contributed by: Matthew Knepley 180472dacd9aSBarry Smith */ 18059c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1806f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1807899cda47SBarry Smith if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1808f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1809f4df32b1SMatthew Knepley } else if (diag != 0.0) { 1810f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1811fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 181229bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18146525c446SSatish Balay } 1815a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1816a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1817f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1818a07cd24cSSatish Balay } 1819a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18219c957beeSSatish Balay } else { 1822f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1823a07cd24cSSatish Balay } 18249c957beeSSatish Balay 1825606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1826a07cd24cSSatish Balay 18270ac07820SSatish Balay /* wait on sends */ 18280ac07820SSatish Balay if (nsends) { 182982502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1830ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1831606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18320ac07820SSatish Balay } 1833606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1834606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18350ac07820SSatish Balay 18363a40ed3dSBarry Smith PetscFunctionReturn(0); 18370ac07820SSatish Balay } 183872dacd9aSBarry Smith 18394a2ae208SSatish Balay #undef __FUNCT__ 18404a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1841dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1842ba4ca20aSSatish Balay { 1843ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 184425fdafccSSatish Balay MPI_Comm comm = A->comm; 1845b24ad042SBarry Smith static PetscTruth called = PETSC_FALSE; 1846dfbe8321SBarry Smith PetscErrorCode ierr; 1847ba4ca20aSSatish Balay 1848d64ed03dSBarry Smith PetscFunctionBegin; 18493a40ed3dSBarry Smith if (!a->rank) { 18503a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 185125fdafccSSatish Balay } 1852b24ad042SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1853d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1854d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18553a40ed3dSBarry Smith PetscFunctionReturn(0); 1856ba4ca20aSSatish Balay } 18570ac07820SSatish Balay 18584a2ae208SSatish Balay #undef __FUNCT__ 18594a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1860dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1861bb5a7306SBarry Smith { 1862bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1863dfbe8321SBarry Smith PetscErrorCode ierr; 1864d64ed03dSBarry Smith 1865d64ed03dSBarry Smith PetscFunctionBegin; 1866bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18673a40ed3dSBarry Smith PetscFunctionReturn(0); 1868bb5a7306SBarry Smith } 1869bb5a7306SBarry Smith 18706849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18710ac07820SSatish Balay 18724a2ae208SSatish Balay #undef __FUNCT__ 18734a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1874dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18757fc3c18eSBarry Smith { 18767fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18777fc3c18eSBarry Smith Mat a,b,c,d; 18787fc3c18eSBarry Smith PetscTruth flg; 1879dfbe8321SBarry Smith PetscErrorCode ierr; 18807fc3c18eSBarry Smith 18817fc3c18eSBarry Smith PetscFunctionBegin; 18827fc3c18eSBarry Smith a = matA->A; b = matA->B; 18837fc3c18eSBarry Smith c = matB->A; d = matB->B; 18847fc3c18eSBarry Smith 18857fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1886abc0a331SBarry Smith if (flg) { 18877fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18887fc3c18eSBarry Smith } 18897fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18907fc3c18eSBarry Smith PetscFunctionReturn(0); 18917fc3c18eSBarry Smith } 18927fc3c18eSBarry Smith 18933c896bc6SHong Zhang #undef __FUNCT__ 18943c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ" 18953c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 18963c896bc6SHong Zhang { 18973c896bc6SHong Zhang PetscErrorCode ierr; 18983c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 18993c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 19003c896bc6SHong Zhang 19013c896bc6SHong Zhang PetscFunctionBegin; 19023c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 19033c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 19043c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 19053c896bc6SHong Zhang } else { 19063c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 19073c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 19083c896bc6SHong Zhang } 19093c896bc6SHong Zhang PetscFunctionReturn(0); 19103c896bc6SHong Zhang } 1911273d9f13SBarry Smith 19124a2ae208SSatish Balay #undef __FUNCT__ 19134a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1914dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1915273d9f13SBarry Smith { 1916dfbe8321SBarry Smith PetscErrorCode ierr; 1917273d9f13SBarry Smith 1918273d9f13SBarry Smith PetscFunctionBegin; 19197edd0491SSatish Balay ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1920273d9f13SBarry Smith PetscFunctionReturn(0); 1921273d9f13SBarry Smith } 1922273d9f13SBarry Smith 19234fe895cdSHong Zhang #include "petscblaslapack.h" 19244fe895cdSHong Zhang #undef __FUNCT__ 19254fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ" 19264fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 19274fe895cdSHong Zhang { 19284fe895cdSHong Zhang PetscErrorCode ierr; 19294fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 19304fe895cdSHong Zhang PetscBLASInt bnz,one=1; 19314fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 19324fe895cdSHong Zhang 19334fe895cdSHong Zhang PetscFunctionBegin; 19344fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 19354fe895cdSHong Zhang PetscScalar alpha = a; 19364fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->A->data; 19374fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->A->data; 19384fe895cdSHong Zhang bnz = (PetscBLASInt)x->nz; 19394fe895cdSHong Zhang BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 19404fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->B->data; 19414fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->B->data; 19424fe895cdSHong Zhang bnz = (PetscBLASInt)x->nz; 19434fe895cdSHong Zhang BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 19444fe895cdSHong Zhang } else { 19454fe895cdSHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 19464fe895cdSHong Zhang } 19474fe895cdSHong Zhang PetscFunctionReturn(0); 19484fe895cdSHong Zhang } 19494fe895cdSHong Zhang 195099cafbc1SBarry Smith #undef __FUNCT__ 195199cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ" 195299cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 195399cafbc1SBarry Smith { 195499cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 195599cafbc1SBarry Smith PetscErrorCode ierr; 195699cafbc1SBarry Smith 195799cafbc1SBarry Smith PetscFunctionBegin; 195899cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 195999cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 196099cafbc1SBarry Smith PetscFunctionReturn(0); 196199cafbc1SBarry Smith } 196299cafbc1SBarry Smith 196399cafbc1SBarry Smith #undef __FUNCT__ 196499cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 196599cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 196699cafbc1SBarry Smith { 196799cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 196899cafbc1SBarry Smith PetscErrorCode ierr; 196999cafbc1SBarry Smith 197099cafbc1SBarry Smith PetscFunctionBegin; 197199cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 197299cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 197399cafbc1SBarry Smith PetscFunctionReturn(0); 197499cafbc1SBarry Smith } 197599cafbc1SBarry Smith 197679bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1977cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1978cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1979cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1980cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1981cc2dc46cSBarry Smith MatMult_MPIBAIJ, 198297304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 19837c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19847c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1985cc2dc46cSBarry Smith 0, 1986cc2dc46cSBarry Smith 0, 1987cc2dc46cSBarry Smith 0, 198897304618SKris Buschelman /*10*/ 0, 1989cc2dc46cSBarry Smith 0, 1990cc2dc46cSBarry Smith 0, 1991cc2dc46cSBarry Smith 0, 1992cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 199397304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 19947fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1995cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1996cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1997cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 199897304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1999cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 2000cc2dc46cSBarry Smith 0, 2001cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 2002cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 200397304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 2004cc2dc46cSBarry Smith 0, 2005cc2dc46cSBarry Smith 0, 2006cc2dc46cSBarry Smith 0, 2007cc2dc46cSBarry Smith 0, 200897304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 2009273d9f13SBarry Smith 0, 2010cc2dc46cSBarry Smith 0, 2011cc2dc46cSBarry Smith 0, 2012cc2dc46cSBarry Smith 0, 201397304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 2014cc2dc46cSBarry Smith 0, 2015cc2dc46cSBarry Smith 0, 2016cc2dc46cSBarry Smith 0, 2017cc2dc46cSBarry Smith 0, 20184fe895cdSHong Zhang /*40*/ MatAXPY_MPIBAIJ, 2019cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 2020cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2021cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 20223c896bc6SHong Zhang MatCopy_MPIBAIJ, 202397304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ, 2024cc2dc46cSBarry Smith MatScale_MPIBAIJ, 2025cc2dc46cSBarry Smith 0, 2026cc2dc46cSBarry Smith 0, 2027cc2dc46cSBarry Smith 0, 2028521d7252SBarry Smith /*50*/ 0, 2029cc2dc46cSBarry Smith 0, 2030cc2dc46cSBarry Smith 0, 2031cc2dc46cSBarry Smith 0, 2032cc2dc46cSBarry Smith 0, 203397304618SKris Buschelman /*55*/ 0, 2034cc2dc46cSBarry Smith 0, 2035cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 2036cc2dc46cSBarry Smith 0, 2037cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 203897304618SKris Buschelman /*60*/ 0, 2039f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2040f14a1c24SBarry Smith MatView_MPIBAIJ, 2041357abbc8SBarry Smith 0, 20427843d17aSBarry Smith 0, 204397304618SKris Buschelman /*65*/ 0, 20447843d17aSBarry Smith 0, 20457843d17aSBarry Smith 0, 20467843d17aSBarry Smith 0, 20477843d17aSBarry Smith 0, 204897304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ, 20497843d17aSBarry Smith 0, 205097304618SKris Buschelman 0, 205197304618SKris Buschelman 0, 205297304618SKris Buschelman 0, 205397304618SKris Buschelman /*75*/ 0, 205497304618SKris Buschelman 0, 205597304618SKris Buschelman 0, 205697304618SKris Buschelman 0, 205797304618SKris Buschelman 0, 205897304618SKris Buschelman /*80*/ 0, 205997304618SKris Buschelman 0, 206097304618SKris Buschelman 0, 206197304618SKris Buschelman 0, 2062865e5f61SKris Buschelman MatLoad_MPIBAIJ, 2063865e5f61SKris Buschelman /*85*/ 0, 2064865e5f61SKris Buschelman 0, 2065865e5f61SKris Buschelman 0, 2066865e5f61SKris Buschelman 0, 2067865e5f61SKris Buschelman 0, 2068865e5f61SKris Buschelman /*90*/ 0, 2069865e5f61SKris Buschelman 0, 2070865e5f61SKris Buschelman 0, 2071865e5f61SKris Buschelman 0, 2072865e5f61SKris Buschelman 0, 2073865e5f61SKris Buschelman /*95*/ 0, 2074865e5f61SKris Buschelman 0, 2075865e5f61SKris Buschelman 0, 207699cafbc1SBarry Smith 0, 207799cafbc1SBarry Smith 0, 207899cafbc1SBarry Smith /*100*/0, 207999cafbc1SBarry Smith 0, 208099cafbc1SBarry Smith 0, 208199cafbc1SBarry Smith 0, 208299cafbc1SBarry Smith 0, 208399cafbc1SBarry Smith /*105*/0, 208499cafbc1SBarry Smith MatRealPart_MPIBAIJ, 208599cafbc1SBarry Smith MatImaginaryPart_MPIBAIJ}; 208679bdfe76SSatish Balay 20875ef9f2a5SBarry Smith 2088e18c124aSSatish Balay EXTERN_C_BEGIN 20894a2ae208SSatish Balay #undef __FUNCT__ 20904a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2091be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20925ef9f2a5SBarry Smith { 20935ef9f2a5SBarry Smith PetscFunctionBegin; 20945ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20955ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20965ef9f2a5SBarry Smith PetscFunctionReturn(0); 20975ef9f2a5SBarry Smith } 2098e18c124aSSatish Balay EXTERN_C_END 209979bdfe76SSatish Balay 2100273d9f13SBarry Smith EXTERN_C_BEGIN 2101f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2102d94109b8SHong Zhang EXTERN_C_END 2103d94109b8SHong Zhang 2104aac34f13SBarry Smith #undef __FUNCT__ 2105aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2106*b7940d39SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2107aac34f13SBarry Smith { 2108899cda47SBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2109899cda47SBarry Smith PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2110899cda47SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2111aac34f13SBarry Smith const PetscInt *JJ; 2112aac34f13SBarry Smith PetscScalar *values; 2113aac34f13SBarry Smith PetscErrorCode ierr; 2114aac34f13SBarry Smith 2115aac34f13SBarry Smith PetscFunctionBegin; 2116aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2117*b7940d39SSatish Balay if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2118aac34f13SBarry Smith #endif 2119aac34f13SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2120aac34f13SBarry Smith o_nnz = d_nnz + m; 2121aac34f13SBarry Smith 2122aac34f13SBarry Smith for (i=0; i<m; i++) { 2123*b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 2124*b7940d39SSatish Balay JJ = J + Ii[i]; 2125aac34f13SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 2126aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2127aac34f13SBarry Smith if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2128aac34f13SBarry Smith #endif 2129aac34f13SBarry Smith for (j=0; j<nnz; j++) { 2130aac34f13SBarry Smith if (*JJ >= cstart) break; 2131aac34f13SBarry Smith JJ++; 2132aac34f13SBarry Smith } 2133aac34f13SBarry Smith d = 0; 2134aac34f13SBarry Smith for (; j<nnz; j++) { 2135aac34f13SBarry Smith if (*JJ++ >= cend) break; 2136aac34f13SBarry Smith d++; 2137aac34f13SBarry Smith } 2138aac34f13SBarry Smith d_nnz[i] = d; 2139aac34f13SBarry Smith o_nnz[i] = nnz - d; 2140aac34f13SBarry Smith } 2141aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2142aac34f13SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2143aac34f13SBarry Smith 2144aac34f13SBarry Smith if (v) values = (PetscScalar*)v; 2145aac34f13SBarry Smith else { 2146aac34f13SBarry Smith ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2147aac34f13SBarry Smith ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2148aac34f13SBarry Smith } 2149aac34f13SBarry Smith 2150aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2151aac34f13SBarry Smith for (i=0; i<m; i++) { 2152aac34f13SBarry Smith ii = i + rstart; 2153*b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 2154*b7940d39SSatish Balay ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2155aac34f13SBarry Smith } 2156aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2159aac34f13SBarry Smith 2160aac34f13SBarry Smith if (!v) { 2161aac34f13SBarry Smith ierr = PetscFree(values);CHKERRQ(ierr); 2162aac34f13SBarry Smith } 2163aac34f13SBarry Smith PetscFunctionReturn(0); 2164aac34f13SBarry Smith } 2165aac34f13SBarry Smith 2166aac34f13SBarry Smith #undef __FUNCT__ 2167aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2168aac34f13SBarry Smith /*@C 2169aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2170aac34f13SBarry Smith (the default parallel PETSc format). 2171aac34f13SBarry Smith 2172aac34f13SBarry Smith Collective on MPI_Comm 2173aac34f13SBarry Smith 2174aac34f13SBarry Smith Input Parameters: 2175aac34f13SBarry Smith + A - the matrix 2176aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2177aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2178aac34f13SBarry Smith - v - optional values in the matrix 2179aac34f13SBarry Smith 2180aac34f13SBarry Smith Level: developer 2181aac34f13SBarry Smith 2182aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2183aac34f13SBarry Smith 2184aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2185aac34f13SBarry Smith @*/ 2186be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2187aac34f13SBarry Smith { 2188aac34f13SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2189aac34f13SBarry Smith 2190aac34f13SBarry Smith PetscFunctionBegin; 2191aac34f13SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2192aac34f13SBarry Smith if (f) { 2193aac34f13SBarry Smith ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2194aac34f13SBarry Smith } 2195aac34f13SBarry Smith PetscFunctionReturn(0); 2196aac34f13SBarry Smith } 2197aac34f13SBarry Smith 2198d94109b8SHong Zhang EXTERN_C_BEGIN 21994a2ae208SSatish Balay #undef __FUNCT__ 2200a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2201be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2202a23d5eceSKris Buschelman { 2203a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2204dfbe8321SBarry Smith PetscErrorCode ierr; 2205b24ad042SBarry Smith PetscInt i; 2206a23d5eceSKris Buschelman 2207a23d5eceSKris Buschelman PetscFunctionBegin; 2208a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 2209a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2210a23d5eceSKris Buschelman 2211a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2212a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2213a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 221477431f27SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 221577431f27SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2216899cda47SBarry Smith 2217899cda47SBarry Smith B->rmap.bs = bs; 2218899cda47SBarry Smith B->cmap.bs = bs; 2219899cda47SBarry Smith ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2220899cda47SBarry Smith ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2221899cda47SBarry Smith 2222a23d5eceSKris Buschelman if (d_nnz) { 2223899cda47SBarry Smith for (i=0; i<B->rmap.n/bs; i++) { 222477431f27SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2225a23d5eceSKris Buschelman } 2226a23d5eceSKris Buschelman } 2227a23d5eceSKris Buschelman if (o_nnz) { 2228899cda47SBarry Smith for (i=0; i<B->rmap.n/bs; i++) { 222977431f27SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2230a23d5eceSKris Buschelman } 2231a23d5eceSKris Buschelman } 2232a23d5eceSKris Buschelman 2233a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2234a23d5eceSKris Buschelman b->bs2 = bs*bs; 2235899cda47SBarry Smith b->mbs = B->rmap.n/bs; 2236899cda47SBarry Smith b->nbs = B->cmap.n/bs; 2237899cda47SBarry Smith b->Mbs = B->rmap.N/bs; 2238899cda47SBarry Smith b->Nbs = B->cmap.N/bs; 2239a23d5eceSKris Buschelman 2240a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2241899cda47SBarry Smith b->rangebs[i] = B->rmap.range[i]/bs; 2242a23d5eceSKris Buschelman } 2243899cda47SBarry Smith b->rstartbs = B->rmap.rstart/bs; 2244899cda47SBarry Smith b->rendbs = B->rmap.rend/bs; 2245899cda47SBarry Smith b->cstartbs = B->cmap.rstart/bs; 2246899cda47SBarry Smith b->cendbs = B->cmap.rend/bs; 2247a23d5eceSKris Buschelman 2248f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2249899cda47SBarry Smith ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 22509c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2251c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 225252e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2253f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2254899cda47SBarry Smith ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 22559c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2256c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 225752e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2258c60e587dSKris Buschelman 2259a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2260a23d5eceSKris Buschelman 2261a23d5eceSKris Buschelman PetscFunctionReturn(0); 2262a23d5eceSKris Buschelman } 2263a23d5eceSKris Buschelman EXTERN_C_END 2264a23d5eceSKris Buschelman 2265a23d5eceSKris Buschelman EXTERN_C_BEGIN 2266be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2267be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 226892b32695SKris Buschelman EXTERN_C_END 22695bf65638SKris Buschelman 22700bad9183SKris Buschelman /*MC 2271fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 22720bad9183SKris Buschelman 22730bad9183SKris Buschelman Options Database Keys: 22740bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 22750bad9183SKris Buschelman 22760bad9183SKris Buschelman Level: beginner 22770bad9183SKris Buschelman 22780bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 22790bad9183SKris Buschelman M*/ 22800bad9183SKris Buschelman 228192b32695SKris Buschelman EXTERN_C_BEGIN 2282a23d5eceSKris Buschelman #undef __FUNCT__ 22834a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2284be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2285273d9f13SBarry Smith { 2286273d9f13SBarry Smith Mat_MPIBAIJ *b; 2287dfbe8321SBarry Smith PetscErrorCode ierr; 2288273d9f13SBarry Smith PetscTruth flg; 2289273d9f13SBarry Smith 2290273d9f13SBarry Smith PetscFunctionBegin; 229182502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 229282502324SSatish Balay B->data = (void*)b; 229382502324SSatish Balay 2294085a36d4SBarry Smith 2295273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2296273d9f13SBarry Smith B->mapping = 0; 2297273d9f13SBarry Smith B->factor = 0; 2298273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2299273d9f13SBarry Smith 2300273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2301273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2302273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2303273d9f13SBarry Smith 2304273d9f13SBarry Smith /* build local table of row and column ownerships */ 2305899cda47SBarry Smith ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2306273d9f13SBarry Smith 2307273d9f13SBarry Smith /* build cache for off array entries formed */ 2308273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2309273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2310273d9f13SBarry Smith b->colmap = PETSC_NULL; 2311273d9f13SBarry Smith b->garray = PETSC_NULL; 2312273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2313273d9f13SBarry Smith 2314cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2315273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 231664a35ccbSBarry Smith b->setvalueslen = 0; 2317273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2318273d9f13SBarry Smith #endif 2319273d9f13SBarry Smith 2320273d9f13SBarry Smith /* stuff used in block assembly */ 2321273d9f13SBarry Smith b->barray = 0; 2322273d9f13SBarry Smith 2323273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2324273d9f13SBarry Smith b->lvec = 0; 2325273d9f13SBarry Smith b->Mvctx = 0; 2326273d9f13SBarry Smith 2327273d9f13SBarry Smith /* stuff for MatGetRow() */ 2328273d9f13SBarry Smith b->rowindices = 0; 2329273d9f13SBarry Smith b->rowvalues = 0; 2330273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2331273d9f13SBarry Smith 2332273d9f13SBarry Smith /* hash table stuff */ 2333273d9f13SBarry Smith b->ht = 0; 2334273d9f13SBarry Smith b->hd = 0; 2335273d9f13SBarry Smith b->ht_size = 0; 2336273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2337273d9f13SBarry Smith b->ht_fact = 0; 2338273d9f13SBarry Smith b->ht_total_ct = 0; 2339273d9f13SBarry Smith b->ht_insert_ct = 0; 2340273d9f13SBarry Smith 2341b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2342273d9f13SBarry Smith if (flg) { 2343f6275e2eSBarry Smith PetscReal fact = 1.39; 2344273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 234587828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2346273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2347273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2348ae15b995SBarry Smith ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2349273d9f13SBarry Smith } 2350273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2351273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2352273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2353273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2354273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2355273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2356273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2357273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2358273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2359a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2360a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2361a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2362aac34f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2363aac34f13SBarry Smith "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2364aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 236592b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 236692b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 236792b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 23685bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 23695bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 23705bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2371273d9f13SBarry Smith PetscFunctionReturn(0); 2372273d9f13SBarry Smith } 2373273d9f13SBarry Smith EXTERN_C_END 2374273d9f13SBarry Smith 2375209238afSKris Buschelman /*MC 2376002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2377209238afSKris Buschelman 2378209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2379209238afSKris Buschelman and MATMPIBAIJ otherwise. 2380209238afSKris Buschelman 2381209238afSKris Buschelman Options Database Keys: 2382209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2383209238afSKris Buschelman 2384209238afSKris Buschelman Level: beginner 2385209238afSKris Buschelman 2386aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2387209238afSKris Buschelman M*/ 2388209238afSKris Buschelman 2389209238afSKris Buschelman EXTERN_C_BEGIN 2390209238afSKris Buschelman #undef __FUNCT__ 2391209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2392be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2393dfbe8321SBarry Smith { 23946849ba73SBarry Smith PetscErrorCode ierr; 2395b24ad042SBarry Smith PetscMPIInt size; 2396209238afSKris Buschelman 2397209238afSKris Buschelman PetscFunctionBegin; 2398209238afSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2399209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2400209238afSKris Buschelman if (size == 1) { 2401209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2402209238afSKris Buschelman } else { 2403209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2404209238afSKris Buschelman } 2405209238afSKris Buschelman PetscFunctionReturn(0); 2406209238afSKris Buschelman } 2407209238afSKris Buschelman EXTERN_C_END 2408209238afSKris Buschelman 24094a2ae208SSatish Balay #undef __FUNCT__ 24104a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2411273d9f13SBarry Smith /*@C 2412aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2413273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2414273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2415273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2416273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2417273d9f13SBarry Smith 2418273d9f13SBarry Smith Collective on Mat 2419273d9f13SBarry Smith 2420273d9f13SBarry Smith Input Parameters: 2421273d9f13SBarry Smith + A - the matrix 2422273d9f13SBarry Smith . bs - size of blockk 2423273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2424273d9f13SBarry Smith submatrix (same for all local rows) 2425273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2426273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2427273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2428273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2429273d9f13SBarry Smith submatrix (same for all local rows). 2430273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2431273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2432273d9f13SBarry Smith each block row) or PETSC_NULL. 2433273d9f13SBarry Smith 243449a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 2435273d9f13SBarry Smith 2436273d9f13SBarry Smith Options Database Keys: 2437273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2438273d9f13SBarry Smith block calculations (much slower) 2439273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2440273d9f13SBarry Smith 2441273d9f13SBarry Smith Notes: 2442273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2443273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2444273d9f13SBarry Smith 2445273d9f13SBarry Smith Storage Information: 2446273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2447273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2448273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2449273d9f13SBarry Smith local matrix (a rectangular submatrix). 2450273d9f13SBarry Smith 2451273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2452273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2453273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2454273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2455273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2456273d9f13SBarry Smith 2457273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2458273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2459273d9f13SBarry Smith 2460273d9f13SBarry Smith .vb 2461273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2462273d9f13SBarry Smith ------------------- 2463273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2464273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2465273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2466273d9f13SBarry Smith ------------------- 2467273d9f13SBarry Smith .ve 2468273d9f13SBarry Smith 2469273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2470273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2471273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2472273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2473273d9f13SBarry Smith 2474273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2475273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2476273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2477273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2478273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2479273d9f13SBarry Smith matrices. 2480273d9f13SBarry Smith 2481273d9f13SBarry Smith Level: intermediate 2482273d9f13SBarry Smith 2483273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2484273d9f13SBarry Smith 2485aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2486273d9f13SBarry Smith @*/ 2487be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2488273d9f13SBarry Smith { 2489b24ad042SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2490273d9f13SBarry Smith 2491273d9f13SBarry Smith PetscFunctionBegin; 2492a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2493a23d5eceSKris Buschelman if (f) { 2494a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2495273d9f13SBarry Smith } 2496273d9f13SBarry Smith PetscFunctionReturn(0); 2497273d9f13SBarry Smith } 2498273d9f13SBarry Smith 24994a2ae208SSatish Balay #undef __FUNCT__ 25004a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 250179bdfe76SSatish Balay /*@C 250279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 250379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 250479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 250579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 250679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 250779bdfe76SSatish Balay 2508db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2509db81eaa0SLois Curfman McInnes 251079bdfe76SSatish Balay Input Parameters: 2511db81eaa0SLois Curfman McInnes + comm - MPI communicator 251279bdfe76SSatish Balay . bs - size of blockk 251379bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 251492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 251592e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 251692e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 251792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 251892e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2519be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2520be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 252147a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 252279bdfe76SSatish Balay submatrix (same for all local rows) 252347a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 252492e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2525db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 252647a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 252779bdfe76SSatish Balay submatrix (same for all local rows). 252847a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 252992e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 253092e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 253179bdfe76SSatish Balay 253279bdfe76SSatish Balay Output Parameter: 253379bdfe76SSatish Balay . A - the matrix 253479bdfe76SSatish Balay 2535db81eaa0SLois Curfman McInnes Options Database Keys: 2536db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2537db81eaa0SLois Curfman McInnes block calculations (much slower) 2538db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 25393ffaccefSLois Curfman McInnes 2540b259b22eSLois Curfman McInnes Notes: 254149a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 254249a6f317SBarry Smith 254347a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 254447a75d0bSBarry Smith 254579bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 254679bdfe76SSatish Balay (possibly both). 254779bdfe76SSatish Balay 2548be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2549be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2550be79a94dSBarry Smith 255179bdfe76SSatish Balay Storage Information: 255279bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 255379bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 255479bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 255579bdfe76SSatish Balay local matrix (a rectangular submatrix). 255679bdfe76SSatish Balay 255779bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 255879bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 255979bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 256079bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 256179bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 256279bdfe76SSatish Balay 256379bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 256479bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 256579bdfe76SSatish Balay 2566db81eaa0SLois Curfman McInnes .vb 2567db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2568db81eaa0SLois Curfman McInnes ------------------- 2569db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2570db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2571db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2572db81eaa0SLois Curfman McInnes ------------------- 2573db81eaa0SLois Curfman McInnes .ve 257479bdfe76SSatish Balay 257579bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 257679bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 257779bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 257857b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 257979bdfe76SSatish Balay 2580d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2581d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 258279bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 258392e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 258492e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 25856da5968aSLois Curfman McInnes matrices. 258679bdfe76SSatish Balay 2587027ccd11SLois Curfman McInnes Level: intermediate 2588027ccd11SLois Curfman McInnes 258992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 259079bdfe76SSatish Balay 2591aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 259279bdfe76SSatish Balay @*/ 2593be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 259479bdfe76SSatish Balay { 25956849ba73SBarry Smith PetscErrorCode ierr; 2596b24ad042SBarry Smith PetscMPIInt size; 259779bdfe76SSatish Balay 2598d64ed03dSBarry Smith PetscFunctionBegin; 2599f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2600f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2601d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2602273d9f13SBarry Smith if (size > 1) { 2603273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2604273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2605273d9f13SBarry Smith } else { 2606273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2607273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 26083914022bSBarry Smith } 26093a40ed3dSBarry Smith PetscFunctionReturn(0); 261079bdfe76SSatish Balay } 2611026e39d0SSatish Balay 26124a2ae208SSatish Balay #undef __FUNCT__ 26134a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 26146849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 26150ac07820SSatish Balay { 26160ac07820SSatish Balay Mat mat; 26170ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2618dfbe8321SBarry Smith PetscErrorCode ierr; 2619b24ad042SBarry Smith PetscInt len=0; 26200ac07820SSatish Balay 2621d64ed03dSBarry Smith PetscFunctionBegin; 26220ac07820SSatish Balay *newmat = 0; 2623f69a0ea3SMatthew Knepley ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2624899cda47SBarry Smith ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2625be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 26261d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 26277fff6886SHong Zhang 26284beb1cfeSHong Zhang mat->factor = matin->factor; 2629273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 26300ac07820SSatish Balay mat->assembled = PETSC_TRUE; 26317fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 26327fff6886SHong Zhang 2633273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 2634899cda47SBarry Smith mat->rmap.bs = matin->rmap.bs; 26350ac07820SSatish Balay a->bs2 = oldmat->bs2; 26360ac07820SSatish Balay a->mbs = oldmat->mbs; 26370ac07820SSatish Balay a->nbs = oldmat->nbs; 26380ac07820SSatish Balay a->Mbs = oldmat->Mbs; 26390ac07820SSatish Balay a->Nbs = oldmat->Nbs; 26400ac07820SSatish Balay 2641899cda47SBarry Smith ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2642899cda47SBarry Smith ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2643899cda47SBarry Smith 26440ac07820SSatish Balay a->size = oldmat->size; 26450ac07820SSatish Balay a->rank = oldmat->rank; 2646aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2647aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2648aef5e8e0SSatish Balay a->rowindices = 0; 26490ac07820SSatish Balay a->rowvalues = 0; 26500ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 265130793edcSSatish Balay a->barray = 0; 2652899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2653899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2654899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2655899cda47SBarry Smith a->cendbs = oldmat->cendbs; 26560ac07820SSatish Balay 2657133cdb44SSatish Balay /* hash table stuff */ 2658133cdb44SSatish Balay a->ht = 0; 2659133cdb44SSatish Balay a->hd = 0; 2660133cdb44SSatish Balay a->ht_size = 0; 2661133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 266225fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2663133cdb44SSatish Balay a->ht_total_ct = 0; 2664133cdb44SSatish Balay a->ht_insert_ct = 0; 2665133cdb44SSatish Balay 2666899cda47SBarry Smith ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 26678798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2668899cda47SBarry Smith ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 26690ac07820SSatish Balay if (oldmat->colmap) { 2670aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 26710f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 267248e59246SSatish Balay #else 2673b24ad042SBarry Smith ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 267452e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2675b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 267648e59246SSatish Balay #endif 26770ac07820SSatish Balay } else a->colmap = 0; 26784beb1cfeSHong Zhang 26790ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2680b24ad042SBarry Smith ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 268152e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2682b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 26830ac07820SSatish Balay } else a->garray = 0; 26840ac07820SSatish Balay 26850ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 268652e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 26870ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 268852e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 26897fff6886SHong Zhang 26902e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 269152e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 26922e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 269352e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2694b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 26950ac07820SSatish Balay *newmat = mat; 26964beb1cfeSHong Zhang 26973a40ed3dSBarry Smith PetscFunctionReturn(0); 26980ac07820SSatish Balay } 269957b952d6SSatish Balay 2700e090d566SSatish Balay #include "petscsys.h" 270157b952d6SSatish Balay 27024a2ae208SSatish Balay #undef __FUNCT__ 27034a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2704f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 270557b952d6SSatish Balay { 270657b952d6SSatish Balay Mat A; 27076849ba73SBarry Smith PetscErrorCode ierr; 2708b24ad042SBarry Smith int fd; 2709b24ad042SBarry Smith PetscInt i,nz,j,rstart,rend; 271087828ca2SBarry Smith PetscScalar *vals,*buf; 271157b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 271257b952d6SSatish Balay MPI_Status status; 2713b24ad042SBarry Smith PetscMPIInt rank,size,maxnz; 2714b24ad042SBarry Smith PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2715910ba992SMatthew Knepley PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2716167e7480SBarry Smith PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2717dc231df0SBarry Smith PetscMPIInt tag = ((PetscObject)viewer)->tag; 2718910ba992SMatthew Knepley PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2719dc231df0SBarry Smith PetscInt dcount,kmax,k,nzcount,tmp,mend; 272057b952d6SSatish Balay 2721d64ed03dSBarry Smith PetscFunctionBegin; 2722b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 272357b952d6SSatish Balay 2724d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2725d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272657b952d6SSatish Balay if (!rank) { 2727b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2728e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2729552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 27306c5fab8fSBarry Smith } 2731d64ed03dSBarry Smith 2732b24ad042SBarry Smith ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 273357b952d6SSatish Balay M = header[1]; N = header[2]; 273457b952d6SSatish Balay 273529bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 273657b952d6SSatish Balay 273757b952d6SSatish Balay /* 273857b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 273957b952d6SSatish Balay divisible by the blocksize 274057b952d6SSatish Balay */ 274157b952d6SSatish Balay Mbs = M/bs; 2742dc231df0SBarry Smith extra_rows = bs - M + bs*Mbs; 274357b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 274457b952d6SSatish Balay else Mbs++; 274557b952d6SSatish Balay if (extra_rows && !rank) { 2746ae15b995SBarry Smith ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 274757b952d6SSatish Balay } 2748537820f0SBarry Smith 274957b952d6SSatish Balay /* determine ownership of all rows */ 275057b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 275157b952d6SSatish Balay m = mbs*bs; 2752dc231df0SBarry Smith ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2753b24ad042SBarry Smith ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2754167e7480SBarry Smith 2755167e7480SBarry Smith /* process 0 needs enough room for process with most rows */ 2756167e7480SBarry Smith if (!rank) { 2757167e7480SBarry Smith mmax = rowners[1]; 2758167e7480SBarry Smith for (i=2; i<size; i++) { 2759167e7480SBarry Smith mmax = PetscMax(mmax,rowners[i]); 2760167e7480SBarry Smith } 2761ca02efcfSSatish Balay mmax*=bs; 2762167e7480SBarry Smith } else mmax = m; 2763167e7480SBarry Smith 276457b952d6SSatish Balay rowners[0] = 0; 2765cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2766cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 276757b952d6SSatish Balay rstart = rowners[rank]; 276857b952d6SSatish Balay rend = rowners[rank+1]; 276957b952d6SSatish Balay 277057b952d6SSatish Balay /* distribute row lengths to all processors */ 277119c38ff2SBarry Smith ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 277257b952d6SSatish Balay if (!rank) { 2773dc231df0SBarry Smith mend = m; 2774dc231df0SBarry Smith if (size == 1) mend = mend - extra_rows; 2775dc231df0SBarry Smith ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2776dc231df0SBarry Smith for (j=mend; j<m; j++) locrowlens[j] = 1; 2777dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2778b24ad042SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2779b24ad042SBarry Smith ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2780dc231df0SBarry Smith for (j=0; j<m; j++) { 2781dc231df0SBarry Smith procsnz[0] += locrowlens[j]; 2782dc231df0SBarry Smith } 2783dc231df0SBarry Smith for (i=1; i<size; i++) { 2784dc231df0SBarry Smith mend = browners[i+1] - browners[i]; 2785dc231df0SBarry Smith if (i == size-1) mend = mend - extra_rows; 2786dc231df0SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2787dc231df0SBarry Smith for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2788dc231df0SBarry Smith /* calculate the number of nonzeros on each processor */ 2789dc231df0SBarry Smith for (j=0; j<browners[i+1]-browners[i]; j++) { 279057b952d6SSatish Balay procsnz[i] += rowlengths[j]; 279157b952d6SSatish Balay } 2792dc231df0SBarry Smith ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 279357b952d6SSatish Balay } 2794606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2795dc231df0SBarry Smith } else { 2796dc231df0SBarry Smith ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2797dc231df0SBarry Smith } 279857b952d6SSatish Balay 2799dc231df0SBarry Smith if (!rank) { 280057b952d6SSatish Balay /* determine max buffer needed and allocate it */ 28018a8e0b3aSBarry Smith maxnz = procsnz[0]; 2802cdc0ba36SBarry Smith for (i=1; i<size; i++) { 280357b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 280457b952d6SSatish Balay } 2805b24ad042SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 280657b952d6SSatish Balay 280757b952d6SSatish Balay /* read in my part of the matrix column indices */ 280857b952d6SSatish Balay nz = procsnz[0]; 280919c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 281057b952d6SSatish Balay mycols = ibuf; 2811cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2812e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2813cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2814cee3aa6bSSatish Balay 281557b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 281657b952d6SSatish Balay for (i=1; i<size-1; i++) { 281757b952d6SSatish Balay nz = procsnz[i]; 2818e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2819b24ad042SBarry Smith ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 282057b952d6SSatish Balay } 282157b952d6SSatish Balay /* read in the stuff for the last proc */ 282257b952d6SSatish Balay if (size != 1) { 282357b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2824e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 282557b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2826b24ad042SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 282757b952d6SSatish Balay } 2828606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2829d64ed03dSBarry Smith } else { 283057b952d6SSatish Balay /* determine buffer space needed for message */ 283157b952d6SSatish Balay nz = 0; 283257b952d6SSatish Balay for (i=0; i<m; i++) { 283357b952d6SSatish Balay nz += locrowlens[i]; 283457b952d6SSatish Balay } 283519c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 283657b952d6SSatish Balay mycols = ibuf; 283757b952d6SSatish Balay /* receive message of column indices*/ 2838b24ad042SBarry Smith ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2839b24ad042SBarry Smith ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 284029bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 284157b952d6SSatish Balay } 284257b952d6SSatish Balay 284357b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2844dc231df0SBarry Smith ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2845dc231df0SBarry Smith ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2846dc231df0SBarry Smith ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2847dc231df0SBarry Smith ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2848dc231df0SBarry Smith ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 284957b952d6SSatish Balay rowcount = 0; nzcount = 0; 285057b952d6SSatish Balay for (i=0; i<mbs; i++) { 285157b952d6SSatish Balay dcount = 0; 285257b952d6SSatish Balay odcount = 0; 285357b952d6SSatish Balay for (j=0; j<bs; j++) { 285457b952d6SSatish Balay kmax = locrowlens[rowcount]; 285557b952d6SSatish Balay for (k=0; k<kmax; k++) { 285657b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 285757b952d6SSatish Balay if (!mask[tmp]) { 285857b952d6SSatish Balay mask[tmp] = 1; 285957b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 286057b952d6SSatish Balay else masked1[dcount++] = tmp; 286157b952d6SSatish Balay } 286257b952d6SSatish Balay } 286357b952d6SSatish Balay rowcount++; 286457b952d6SSatish Balay } 2865cee3aa6bSSatish Balay 286657b952d6SSatish Balay dlens[i] = dcount; 286757b952d6SSatish Balay odlens[i] = odcount; 2868cee3aa6bSSatish Balay 286957b952d6SSatish Balay /* zero out the mask elements we set */ 287057b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 287157b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 287257b952d6SSatish Balay } 2873cee3aa6bSSatish Balay 287457b952d6SSatish Balay /* create our matrix */ 2875f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2876f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 287778ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 287878ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 287978ae41b4SKris Buschelman 288078ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2881dc231df0SBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 288257b952d6SSatish Balay 288357b952d6SSatish Balay if (!rank) { 288419c38ff2SBarry Smith ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 288557b952d6SSatish Balay /* read in my part of the matrix numerical values */ 288657b952d6SSatish Balay nz = procsnz[0]; 288757b952d6SSatish Balay vals = buf; 2888cee3aa6bSSatish Balay mycols = ibuf; 2889cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2890e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2891cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2892537820f0SBarry Smith 289357b952d6SSatish Balay /* insert into matrix */ 289457b952d6SSatish Balay jj = rstart*bs; 289557b952d6SSatish Balay for (i=0; i<m; i++) { 2896dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 289757b952d6SSatish Balay mycols += locrowlens[i]; 289857b952d6SSatish Balay vals += locrowlens[i]; 289957b952d6SSatish Balay jj++; 290057b952d6SSatish Balay } 290157b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 290257b952d6SSatish Balay for (i=1; i<size-1; i++) { 290357b952d6SSatish Balay nz = procsnz[i]; 290457b952d6SSatish Balay vals = buf; 2905e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2906ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 290757b952d6SSatish Balay } 290857b952d6SSatish Balay /* the last proc */ 290957b952d6SSatish Balay if (size != 1){ 291057b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2911cee3aa6bSSatish Balay vals = buf; 2912e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 291357b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2914ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 291557b952d6SSatish Balay } 2916606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2917d64ed03dSBarry Smith } else { 291857b952d6SSatish Balay /* receive numeric values */ 291919c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 292057b952d6SSatish Balay 292157b952d6SSatish Balay /* receive message of values*/ 292257b952d6SSatish Balay vals = buf; 2923cee3aa6bSSatish Balay mycols = ibuf; 2924ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2925ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 292629bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 292757b952d6SSatish Balay 292857b952d6SSatish Balay /* insert into matrix */ 292957b952d6SSatish Balay jj = rstart*bs; 2930cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2931dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 293257b952d6SSatish Balay mycols += locrowlens[i]; 293357b952d6SSatish Balay vals += locrowlens[i]; 293457b952d6SSatish Balay jj++; 293557b952d6SSatish Balay } 293657b952d6SSatish Balay } 2937606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2938606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2939606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2940dc231df0SBarry Smith ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2941dc231df0SBarry Smith ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2942dc231df0SBarry Smith ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 29436d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29446d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 294578ae41b4SKris Buschelman 294678ae41b4SKris Buschelman *newmat = A; 29473a40ed3dSBarry Smith PetscFunctionReturn(0); 294857b952d6SSatish Balay } 294957b952d6SSatish Balay 29504a2ae208SSatish Balay #undef __FUNCT__ 29514a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2952133cdb44SSatish Balay /*@ 2953133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2954133cdb44SSatish Balay 2955133cdb44SSatish Balay Input Parameters: 2956133cdb44SSatish Balay . mat - the matrix 2957133cdb44SSatish Balay . fact - factor 2958133cdb44SSatish Balay 2959fee21e36SBarry Smith Collective on Mat 2960fee21e36SBarry Smith 29618c890885SBarry Smith Level: advanced 29628c890885SBarry Smith 2963133cdb44SSatish Balay Notes: 2964133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2965133cdb44SSatish Balay 2966133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2967133cdb44SSatish Balay 2968133cdb44SSatish Balay .seealso: MatSetOption() 2969133cdb44SSatish Balay @*/ 2970be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2971133cdb44SSatish Balay { 2972dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscReal); 29735bf65638SKris Buschelman 29745bf65638SKris Buschelman PetscFunctionBegin; 29755bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 29765bf65638SKris Buschelman if (f) { 29775bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 29785bf65638SKris Buschelman } 29795bf65638SKris Buschelman PetscFunctionReturn(0); 29805bf65638SKris Buschelman } 29815bf65638SKris Buschelman 2982be1d678aSKris Buschelman EXTERN_C_BEGIN 29835bf65638SKris Buschelman #undef __FUNCT__ 29845bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2985be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 29865bf65638SKris Buschelman { 298725fdafccSSatish Balay Mat_MPIBAIJ *baij; 2988133cdb44SSatish Balay 2989133cdb44SSatish Balay PetscFunctionBegin; 2990133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2991133cdb44SSatish Balay baij->ht_fact = fact; 2992133cdb44SSatish Balay PetscFunctionReturn(0); 2993133cdb44SSatish Balay } 2994be1d678aSKris Buschelman EXTERN_C_END 2995f2a5309cSSatish Balay 29964a2ae208SSatish Balay #undef __FUNCT__ 29974a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2998be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2999f2a5309cSSatish Balay { 3000f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3001f2a5309cSSatish Balay PetscFunctionBegin; 3002f2a5309cSSatish Balay *Ad = a->A; 3003f2a5309cSSatish Balay *Ao = a->B; 3004195d93cdSBarry Smith *colmap = a->garray; 3005f2a5309cSSatish Balay PetscFunctionReturn(0); 3006f2a5309cSSatish Balay } 3007