1be1d678aSKris Buschelman #define PETSCMAT_DLL 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 479bdfe76SSatish Balay 5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 1693fea6afSBarry Smith 1793fea6afSBarry Smith /* UGLY, ugly, ugly 1887828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1993fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 2093fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2193fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2293fea6afSBarry Smith into the single precision data structures. 2393fea6afSBarry Smith */ 2493fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 3093fea6afSBarry Smith #else 316fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3293fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3393fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 346fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 356fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3693fea6afSBarry Smith #endif 373b2fbd54SBarry Smith 384a2ae208SSatish Balay #undef __FUNCT__ 394a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 417843d17aSBarry Smith { 427843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43dfbe8321SBarry Smith PetscErrorCode ierr; 44b24ad042SBarry Smith PetscInt i; 4587828ca2SBarry Smith PetscScalar *va,*vb; 467843d17aSBarry Smith Vec vtmp; 477843d17aSBarry Smith 487843d17aSBarry Smith PetscFunctionBegin; 497843d17aSBarry Smith 507843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 511ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 527843d17aSBarry Smith 53ac355199SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 547843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 551ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 567843d17aSBarry Smith 57273d9f13SBarry Smith for (i=0; i<A->m; i++){ 58273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 597843d17aSBarry Smith } 607843d17aSBarry Smith 611ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 621ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 647843d17aSBarry Smith 657843d17aSBarry Smith PetscFunctionReturn(0); 667843d17aSBarry Smith } 677843d17aSBarry Smith 687fc3c18eSBarry Smith EXTERN_C_BEGIN 694a2ae208SSatish Balay #undef __FUNCT__ 704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 727fc3c18eSBarry Smith { 737fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74dfbe8321SBarry Smith PetscErrorCode ierr; 757fc3c18eSBarry Smith 767fc3c18eSBarry Smith PetscFunctionBegin; 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 787fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 797fc3c18eSBarry Smith PetscFunctionReturn(0); 807fc3c18eSBarry Smith } 817fc3c18eSBarry Smith EXTERN_C_END 827fc3c18eSBarry Smith 837fc3c18eSBarry Smith EXTERN_C_BEGIN 844a2ae208SSatish Balay #undef __FUNCT__ 854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 877fc3c18eSBarry Smith { 887fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89dfbe8321SBarry Smith PetscErrorCode ierr; 907fc3c18eSBarry Smith 917fc3c18eSBarry Smith PetscFunctionBegin; 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 937fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 947fc3c18eSBarry Smith PetscFunctionReturn(0); 957fc3c18eSBarry Smith } 967fc3c18eSBarry Smith EXTERN_C_END 977fc3c18eSBarry Smith 98537820f0SBarry Smith /* 99537820f0SBarry Smith Local utility routine that creates a mapping from the global column 10057b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10157b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10257b952d6SSatish Balay length of colmap equals the global matrix length. 10357b952d6SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 10757b952d6SSatish Balay { 10857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10957b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 1106849ba73SBarry Smith PetscErrorCode ierr; 111521d7252SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->bs; 11257b952d6SSatish Balay 113d64ed03dSBarry Smith PetscFunctionBegin; 114aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 115f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11648e59246SSatish Balay for (i=0; i<nbs; i++){ 1170f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11848e59246SSatish Balay } 11948e59246SSatish Balay #else 120b24ad042SBarry Smith ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 12152e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12448e59246SSatish Balay #endif 1253a40ed3dSBarry Smith PetscFunctionReturn(0); 12657b952d6SSatish Balay } 12757b952d6SSatish Balay 12880c1aa95SSatish Balay #define CHUNKSIZE 10 12980c1aa95SSatish Balay 130f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13180c1aa95SSatish Balay { \ 13280c1aa95SSatish Balay \ 13380c1aa95SSatish Balay brow = row/bs; \ 13480c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13680c1aa95SSatish Balay bcol = col/bs; \ 13780c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 138ab26458aSBarry Smith low = 0; high = nrow; \ 139ab26458aSBarry Smith while (high-low > 3) { \ 140ab26458aSBarry Smith t = (low+high)/2; \ 141ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 142ab26458aSBarry Smith else low = t; \ 143ab26458aSBarry Smith } \ 144ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14580c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14680c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14780c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 148eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 149eada6651SSatish Balay else *bap = value; \ 150ac7a638eSSatish Balay goto a_noinsert; \ 15180c1aa95SSatish Balay } \ 15280c1aa95SSatish Balay } \ 15389280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 154085a36d4SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155ed1caa07SMatthew Knepley MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 15680c1aa95SSatish Balay N = nrow++ - 1; \ 15780c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 15880c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 15980c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1603eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16180c1aa95SSatish Balay } \ 1623eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 16380c1aa95SSatish Balay rp[_i] = bcol; \ 16480c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 165ac7a638eSSatish Balay a_noinsert:; \ 16680c1aa95SSatish Balay ailen[brow] = nrow; \ 16780c1aa95SSatish Balay } 16857b952d6SSatish Balay 169ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 170ac7a638eSSatish Balay { \ 171ac7a638eSSatish Balay brow = row/bs; \ 172ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 174ac7a638eSSatish Balay bcol = col/bs; \ 175ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 176ac7a638eSSatish Balay low = 0; high = nrow; \ 177ac7a638eSSatish Balay while (high-low > 3) { \ 178ac7a638eSSatish Balay t = (low+high)/2; \ 179ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 180ac7a638eSSatish Balay else low = t; \ 181ac7a638eSSatish Balay } \ 182ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 183ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 184ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 185ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 186ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 187ac7a638eSSatish Balay else *bap = value; \ 188ac7a638eSSatish Balay goto b_noinsert; \ 189ac7a638eSSatish Balay } \ 190ac7a638eSSatish Balay } \ 19189280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 192085a36d4SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 193ed1caa07SMatthew Knepley MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 194085a36d4SBarry Smith CHKMEMQ;\ 195ac7a638eSSatish Balay N = nrow++ - 1; \ 196ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 197ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 198ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 1993eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 200ac7a638eSSatish Balay } \ 2013eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 202ac7a638eSSatish Balay rp[_i] = bcol; \ 203ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 204ac7a638eSSatish Balay b_noinsert:; \ 205ac7a638eSSatish Balay bilen[brow] = nrow; \ 206ac7a638eSSatish Balay } 207ac7a638eSSatish Balay 20893fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2094a2ae208SSatish Balay #undef __FUNCT__ 2104a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 211b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 21257b952d6SSatish Balay { 2136fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 214dfbe8321SBarry Smith PetscErrorCode ierr; 215b24ad042SBarry Smith PetscInt i,N = m*n; 2166fa18ffdSBarry Smith MatScalar *vsingle; 21793fea6afSBarry Smith 21893fea6afSBarry Smith PetscFunctionBegin; 2196fa18ffdSBarry Smith if (N > b->setvalueslen) { 2206fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 22182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2226fa18ffdSBarry Smith b->setvalueslen = N; 2236fa18ffdSBarry Smith } 2246fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2256fa18ffdSBarry Smith 22693fea6afSBarry Smith for (i=0; i<N; i++) { 22793fea6afSBarry Smith vsingle[i] = v[i]; 22893fea6afSBarry Smith } 22993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 23093fea6afSBarry Smith PetscFunctionReturn(0); 23193fea6afSBarry Smith } 23293fea6afSBarry Smith 2334a2ae208SSatish Balay #undef __FUNCT__ 2344a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 235b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 23693fea6afSBarry Smith { 2376fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 238dfbe8321SBarry Smith PetscErrorCode ierr; 239b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2406fa18ffdSBarry Smith MatScalar *vsingle; 24193fea6afSBarry Smith 24293fea6afSBarry Smith PetscFunctionBegin; 2436fa18ffdSBarry Smith if (N > b->setvalueslen) { 2446fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 24582502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2466fa18ffdSBarry Smith b->setvalueslen = N; 2476fa18ffdSBarry Smith } 2486fa18ffdSBarry Smith vsingle = b->setvaluescopy; 24993fea6afSBarry Smith for (i=0; i<N; i++) { 25093fea6afSBarry Smith vsingle[i] = v[i]; 25193fea6afSBarry Smith } 25293fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 25393fea6afSBarry Smith PetscFunctionReturn(0); 25493fea6afSBarry Smith } 2556fa18ffdSBarry Smith 2564a2ae208SSatish Balay #undef __FUNCT__ 2574a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 258b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2596fa18ffdSBarry Smith { 2606fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261dfbe8321SBarry Smith PetscErrorCode ierr; 262b24ad042SBarry Smith PetscInt i,N = m*n; 2636fa18ffdSBarry Smith MatScalar *vsingle; 2646fa18ffdSBarry Smith 2656fa18ffdSBarry Smith PetscFunctionBegin; 2666fa18ffdSBarry Smith if (N > b->setvalueslen) { 2676fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 26882502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2696fa18ffdSBarry Smith b->setvalueslen = N; 2706fa18ffdSBarry Smith } 2716fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2726fa18ffdSBarry Smith for (i=0; i<N; i++) { 2736fa18ffdSBarry Smith vsingle[i] = v[i]; 2746fa18ffdSBarry Smith } 2756fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2766fa18ffdSBarry Smith PetscFunctionReturn(0); 2776fa18ffdSBarry Smith } 2786fa18ffdSBarry Smith 2794a2ae208SSatish Balay #undef __FUNCT__ 2804a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 281b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2826fa18ffdSBarry Smith { 2836fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284dfbe8321SBarry Smith PetscErrorCode ierr; 285b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2866fa18ffdSBarry Smith MatScalar *vsingle; 2876fa18ffdSBarry Smith 2886fa18ffdSBarry Smith PetscFunctionBegin; 2896fa18ffdSBarry Smith if (N > b->setvalueslen) { 2906fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 29182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2926fa18ffdSBarry Smith b->setvalueslen = N; 2936fa18ffdSBarry Smith } 2946fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2956fa18ffdSBarry Smith for (i=0; i<N; i++) { 2966fa18ffdSBarry Smith vsingle[i] = v[i]; 2976fa18ffdSBarry Smith } 2986fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2996fa18ffdSBarry Smith PetscFunctionReturn(0); 3006fa18ffdSBarry Smith } 30193fea6afSBarry Smith #endif 30293fea6afSBarry Smith 3034a2ae208SSatish Balay #undef __FUNCT__ 304e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 305b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 30693fea6afSBarry Smith { 30757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 30893fea6afSBarry Smith MatScalar value; 309273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 310dfbe8321SBarry Smith PetscErrorCode ierr; 311b24ad042SBarry Smith PetscInt i,j,row,col; 312b24ad042SBarry Smith PetscInt rstart_orig=baij->rstart_bs; 313b24ad042SBarry Smith PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 314521d7252SBarry Smith PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 31557b952d6SSatish Balay 316eada6651SSatish Balay /* Some Variables required in the macro */ 31780c1aa95SSatish Balay Mat A = baij->A; 31880c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 319b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3203eda8832SBarry Smith MatScalar *aa=a->a; 321ac7a638eSSatish Balay 322ac7a638eSSatish Balay Mat B = baij->B; 323ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 324b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3253eda8832SBarry Smith MatScalar *ba=b->a; 326ac7a638eSSatish Balay 327b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 328b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 3293eda8832SBarry Smith MatScalar *ap,*bap; 33080c1aa95SSatish Balay 331d64ed03dSBarry Smith PetscFunctionBegin; 33257b952d6SSatish Balay for (i=0; i<m; i++) { 3335ef9f2a5SBarry Smith if (im[i] < 0) continue; 3342515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 33577431f27SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 336639f9d9dSBarry Smith #endif 33757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 33857b952d6SSatish Balay row = im[i] - rstart_orig; 33957b952d6SSatish Balay for (j=0; j<n; j++) { 34057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 34157b952d6SSatish Balay col = in[j] - cstart_orig; 34257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 343f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 34480c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 34573959e64SBarry Smith } else if (in[j] < 0) continue; 3462515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 34777431f27SBarry Smith else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 348639f9d9dSBarry Smith #endif 34957b952d6SSatish Balay else { 35057b952d6SSatish Balay if (mat->was_assembled) { 351905e6a2fSBarry Smith if (!baij->colmap) { 352905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 353905e6a2fSBarry Smith } 354aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3550f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 356bba1ac68SSatish Balay col = col - 1; 35748e59246SSatish Balay #else 358bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 35948e59246SSatish Balay #endif 36057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 36157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3628295de27SSatish Balay col = in[j]; 3639bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 3649bf004c3SSatish Balay B = baij->B; 3659bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 3669bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 3679bf004c3SSatish Balay ba=b->a; 368bba1ac68SSatish Balay } else col += in[j]%bs; 3698295de27SSatish Balay } else col = in[j]; 37057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 37190da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 37290da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 37357b952d6SSatish Balay } 37457b952d6SSatish Balay } 375d64ed03dSBarry Smith } else { 37690f02eecSBarry Smith if (!baij->donotstash) { 377ff2fd236SBarry Smith if (roworiented) { 3786fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 379ff2fd236SBarry Smith } else { 3806fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 38157b952d6SSatish Balay } 38257b952d6SSatish Balay } 38357b952d6SSatish Balay } 38490f02eecSBarry Smith } 3853a40ed3dSBarry Smith PetscFunctionReturn(0); 38657b952d6SSatish Balay } 38757b952d6SSatish Balay 3884a2ae208SSatish Balay #undef __FUNCT__ 3894a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 390b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 391ab26458aSBarry Smith { 392ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 393f15d580aSBarry Smith const MatScalar *value; 394f15d580aSBarry Smith MatScalar *barray=baij->barray; 395273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 396dfbe8321SBarry Smith PetscErrorCode ierr; 397b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 398b24ad042SBarry Smith PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 399521d7252SBarry Smith PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 400ab26458aSBarry Smith 401b16ae2b1SBarry Smith PetscFunctionBegin; 40230793edcSSatish Balay if(!barray) { 40382502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 40482502324SSatish Balay baij->barray = barray; 40530793edcSSatish Balay } 40630793edcSSatish Balay 407ab26458aSBarry Smith if (roworiented) { 408ab26458aSBarry Smith stepval = (n-1)*bs; 409ab26458aSBarry Smith } else { 410ab26458aSBarry Smith stepval = (m-1)*bs; 411ab26458aSBarry Smith } 412ab26458aSBarry Smith for (i=0; i<m; i++) { 4135ef9f2a5SBarry Smith if (im[i] < 0) continue; 4142515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 41577431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 416ab26458aSBarry Smith #endif 417ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 418ab26458aSBarry Smith row = im[i] - rstart; 419ab26458aSBarry Smith for (j=0; j<n; j++) { 42015b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 42115b57d14SSatish Balay if ((roworiented) && (n == 1)) { 422f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 42315b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 424f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 42515b57d14SSatish Balay } else { /* Here a copy is required */ 426ab26458aSBarry Smith if (roworiented) { 427ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 428ab26458aSBarry Smith } else { 429ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 430abef11f7SSatish Balay } 43147513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 43247513183SBarry Smith for (jj=0; jj<bs; jj++) { 43330793edcSSatish Balay *barray++ = *value++; 43447513183SBarry Smith } 43547513183SBarry Smith } 43630793edcSSatish Balay barray -=bs2; 43715b57d14SSatish Balay } 438abef11f7SSatish Balay 439abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 440abef11f7SSatish Balay col = in[j] - cstart; 44193fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 442ab26458aSBarry Smith } 4435ef9f2a5SBarry Smith else if (in[j] < 0) continue; 4442515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 44577431f27SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 446ab26458aSBarry Smith #endif 447ab26458aSBarry Smith else { 448ab26458aSBarry Smith if (mat->was_assembled) { 449ab26458aSBarry Smith if (!baij->colmap) { 450ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451ab26458aSBarry Smith } 452a5eb4965SSatish Balay 4532515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 454aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 455b24ad042SBarry Smith { PetscInt data; 4560f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 45729bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 458fa46199cSSatish Balay } 45948e59246SSatish Balay #else 46029bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461a5eb4965SSatish Balay #endif 46248e59246SSatish Balay #endif 463aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4640f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 465fa46199cSSatish Balay col = (col - 1)/bs; 46648e59246SSatish Balay #else 467a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 46848e59246SSatish Balay #endif 469ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 470ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 471ab26458aSBarry Smith col = in[j]; 472ab26458aSBarry Smith } 473ab26458aSBarry Smith } 474ab26458aSBarry Smith else col = in[j]; 47593fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 476ab26458aSBarry Smith } 477ab26458aSBarry Smith } 478d64ed03dSBarry Smith } else { 479ab26458aSBarry Smith if (!baij->donotstash) { 480ff2fd236SBarry Smith if (roworiented) { 4816fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 482ff2fd236SBarry Smith } else { 4836fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 484ff2fd236SBarry Smith } 485abef11f7SSatish Balay } 486ab26458aSBarry Smith } 487ab26458aSBarry Smith } 4883a40ed3dSBarry Smith PetscFunctionReturn(0); 489ab26458aSBarry Smith } 4906fa18ffdSBarry Smith 4910bdbc534SSatish Balay #define HASH_KEY 0.6180339887 492b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 493b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 4954a2ae208SSatish Balay #undef __FUNCT__ 4964a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 497b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 4980bdbc534SSatish Balay { 4990bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 500273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 501dfbe8321SBarry Smith PetscErrorCode ierr; 502b24ad042SBarry Smith PetscInt i,j,row,col; 503b24ad042SBarry Smith PetscInt rstart_orig=baij->rstart_bs; 504b24ad042SBarry Smith PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 505521d7252SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 506329f5518SBarry Smith PetscReal tmp; 5073eda8832SBarry Smith MatScalar **HD = baij->hd,value; 5082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 509b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5104a15367fSSatish Balay #endif 5110bdbc534SSatish Balay 5120bdbc534SSatish Balay PetscFunctionBegin; 5130bdbc534SSatish Balay 5140bdbc534SSatish Balay for (i=0; i<m; i++) { 5152515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 51629bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 51777431f27SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 5180bdbc534SSatish Balay #endif 5190bdbc534SSatish Balay row = im[i]; 520c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5210bdbc534SSatish Balay for (j=0; j<n; j++) { 5220bdbc534SSatish Balay col = in[j]; 5236fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 524b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 525c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 526c2760754SSatish Balay h1 = HASH(size,key,tmp); 5270bdbc534SSatish Balay 528c2760754SSatish Balay 529c2760754SSatish Balay idx = h1; 5302515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 531187ce0cbSSatish Balay insert_ct++; 532187ce0cbSSatish Balay total_ct++; 533187ce0cbSSatish Balay if (HT[idx] != key) { 534187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 535187ce0cbSSatish Balay if (idx == size) { 536187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 537187ce0cbSSatish Balay if (idx == h1) { 53877431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 539187ce0cbSSatish Balay } 540187ce0cbSSatish Balay } 541187ce0cbSSatish Balay } 542187ce0cbSSatish Balay #else 543c2760754SSatish Balay if (HT[idx] != key) { 544c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 545c2760754SSatish Balay if (idx == size) { 546c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 547c2760754SSatish Balay if (idx == h1) { 54877431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 549c2760754SSatish Balay } 550c2760754SSatish Balay } 551c2760754SSatish Balay } 552187ce0cbSSatish Balay #endif 553c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 554c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 555c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5560bdbc534SSatish Balay } 5570bdbc534SSatish Balay } else { 5580bdbc534SSatish Balay if (!baij->donotstash) { 559ff2fd236SBarry Smith if (roworiented) { 5608798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 561ff2fd236SBarry Smith } else { 5628798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5630bdbc534SSatish Balay } 5640bdbc534SSatish Balay } 5650bdbc534SSatish Balay } 5660bdbc534SSatish Balay } 5672515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 568187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 569187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 570187ce0cbSSatish Balay #endif 5710bdbc534SSatish Balay PetscFunctionReturn(0); 5720bdbc534SSatish Balay } 5730bdbc534SSatish Balay 5744a2ae208SSatish Balay #undef __FUNCT__ 5754a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 576b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 5770bdbc534SSatish Balay { 5780bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 579273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 580dfbe8321SBarry Smith PetscErrorCode ierr; 581b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 582b24ad042SBarry Smith PetscInt rstart=baij->rstart ; 583521d7252SBarry Smith PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 584b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 585329f5518SBarry Smith PetscReal tmp; 5863eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 587f15d580aSBarry Smith const MatScalar *v_t,*value; 5882515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 589b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5904a15367fSSatish Balay #endif 5910bdbc534SSatish Balay 592d0a41580SSatish Balay PetscFunctionBegin; 593d0a41580SSatish Balay 5940bdbc534SSatish Balay if (roworiented) { 5950bdbc534SSatish Balay stepval = (n-1)*bs; 5960bdbc534SSatish Balay } else { 5970bdbc534SSatish Balay stepval = (m-1)*bs; 5980bdbc534SSatish Balay } 5990bdbc534SSatish Balay for (i=0; i<m; i++) { 6002515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 60177431f27SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 60277431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 6030bdbc534SSatish Balay #endif 6040bdbc534SSatish Balay row = im[i]; 605187ce0cbSSatish Balay v_t = v + i*bs2; 606c2760754SSatish Balay if (row >= rstart && row < rend) { 6070bdbc534SSatish Balay for (j=0; j<n; j++) { 6080bdbc534SSatish Balay col = in[j]; 6090bdbc534SSatish Balay 6100bdbc534SSatish Balay /* Look up into the Hash Table */ 611c2760754SSatish Balay key = row*Nbs+col+1; 612c2760754SSatish Balay h1 = HASH(size,key,tmp); 6130bdbc534SSatish Balay 614c2760754SSatish Balay idx = h1; 6152515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 616187ce0cbSSatish Balay total_ct++; 617187ce0cbSSatish Balay insert_ct++; 618187ce0cbSSatish Balay if (HT[idx] != key) { 619187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 620187ce0cbSSatish Balay if (idx == size) { 621187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 622187ce0cbSSatish Balay if (idx == h1) { 62377431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624187ce0cbSSatish Balay } 625187ce0cbSSatish Balay } 626187ce0cbSSatish Balay } 627187ce0cbSSatish Balay #else 628c2760754SSatish Balay if (HT[idx] != key) { 629c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 630c2760754SSatish Balay if (idx == size) { 631c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 632c2760754SSatish Balay if (idx == h1) { 63377431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 634c2760754SSatish Balay } 635c2760754SSatish Balay } 636c2760754SSatish Balay } 637187ce0cbSSatish Balay #endif 638c2760754SSatish Balay baij_a = HD[idx]; 6390bdbc534SSatish Balay if (roworiented) { 640c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 641187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 642187ce0cbSSatish Balay value = v_t; 643187ce0cbSSatish Balay v_t += bs; 644fef45726SSatish Balay if (addv == ADD_VALUES) { 645c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 646c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 647fef45726SSatish Balay baij_a[jj] += *value++; 648b4cc0f5aSSatish Balay } 649b4cc0f5aSSatish Balay } 650fef45726SSatish Balay } else { 651c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 652c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 653fef45726SSatish Balay baij_a[jj] = *value++; 654fef45726SSatish Balay } 655fef45726SSatish Balay } 656fef45726SSatish Balay } 6570bdbc534SSatish Balay } else { 6580bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 659fef45726SSatish Balay if (addv == ADD_VALUES) { 660b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6610bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 662fef45726SSatish Balay baij_a[jj] += *value++; 663fef45726SSatish Balay } 664fef45726SSatish Balay } 665fef45726SSatish Balay } else { 666fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 667fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 668fef45726SSatish Balay baij_a[jj] = *value++; 669fef45726SSatish Balay } 670b4cc0f5aSSatish Balay } 6710bdbc534SSatish Balay } 6720bdbc534SSatish Balay } 6730bdbc534SSatish Balay } 6740bdbc534SSatish Balay } else { 6750bdbc534SSatish Balay if (!baij->donotstash) { 6760bdbc534SSatish Balay if (roworiented) { 6778798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6780bdbc534SSatish Balay } else { 6798798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6800bdbc534SSatish Balay } 6810bdbc534SSatish Balay } 6820bdbc534SSatish Balay } 6830bdbc534SSatish Balay } 6842515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 685187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 686187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 687187ce0cbSSatish Balay #endif 6880bdbc534SSatish Balay PetscFunctionReturn(0); 6890bdbc534SSatish Balay } 690133cdb44SSatish Balay 6914a2ae208SSatish Balay #undef __FUNCT__ 6924a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 693b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 694d6de1c52SSatish Balay { 695d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6966849ba73SBarry Smith PetscErrorCode ierr; 697521d7252SBarry Smith PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 698b24ad042SBarry Smith PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 699d6de1c52SSatish Balay 700133cdb44SSatish Balay PetscFunctionBegin; 701d6de1c52SSatish Balay for (i=0; i<m; i++) { 70277431f27SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 70377431f27SBarry Smith if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 704d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 705d6de1c52SSatish Balay row = idxm[i] - bsrstart; 706d6de1c52SSatish Balay for (j=0; j<n; j++) { 70777431f27SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 70877431f27SBarry Smith if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 709d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 710d6de1c52SSatish Balay col = idxn[j] - bscstart; 71198dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 712d64ed03dSBarry Smith } else { 713905e6a2fSBarry Smith if (!baij->colmap) { 714905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 715905e6a2fSBarry Smith } 716aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7170f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 718fa46199cSSatish Balay data --; 71948e59246SSatish Balay #else 72048e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 72148e59246SSatish Balay #endif 72248e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 723d9d09a02SSatish Balay else { 72448e59246SSatish Balay col = data + idxn[j]%bs; 72598dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 726d6de1c52SSatish Balay } 727d6de1c52SSatish Balay } 728d6de1c52SSatish Balay } 729d64ed03dSBarry Smith } else { 73029bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 731d6de1c52SSatish Balay } 732d6de1c52SSatish Balay } 7333a40ed3dSBarry Smith PetscFunctionReturn(0); 734d6de1c52SSatish Balay } 735d6de1c52SSatish Balay 7364a2ae208SSatish Balay #undef __FUNCT__ 7374a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 738dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 739d6de1c52SSatish Balay { 740d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 741d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 742dfbe8321SBarry Smith PetscErrorCode ierr; 7438a62d963SHong Zhang PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 744329f5518SBarry Smith PetscReal sum = 0.0; 7453eda8832SBarry Smith MatScalar *v; 746d6de1c52SSatish Balay 747d64ed03dSBarry Smith PetscFunctionBegin; 748d6de1c52SSatish Balay if (baij->size == 1) { 749064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 750d6de1c52SSatish Balay } else { 751d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 752d6de1c52SSatish Balay v = amat->a; 7538a62d963SHong Zhang nz = amat->nz*bs2; 7548a62d963SHong Zhang for (i=0; i<nz; i++) { 755aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 756329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 757d6de1c52SSatish Balay #else 758d6de1c52SSatish Balay sum += (*v)*(*v); v++; 759d6de1c52SSatish Balay #endif 760d6de1c52SSatish Balay } 761d6de1c52SSatish Balay v = bmat->a; 7628a62d963SHong Zhang nz = bmat->nz*bs2; 7638a62d963SHong Zhang for (i=0; i<nz; i++) { 764aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 765329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 766d6de1c52SSatish Balay #else 767d6de1c52SSatish Balay sum += (*v)*(*v); v++; 768d6de1c52SSatish Balay #endif 769d6de1c52SSatish Balay } 770064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 771064f8208SBarry Smith *nrm = sqrt(*nrm); 7728a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 7738a62d963SHong Zhang PetscReal *tmp,*tmp2; 7748a62d963SHong Zhang PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 7758a62d963SHong Zhang ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 7768a62d963SHong Zhang tmp2 = tmp + mat->N; 7778a62d963SHong Zhang ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 7788a62d963SHong Zhang v = amat->a; jj = amat->j; 7798a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 7808a62d963SHong Zhang for (j=0; j<bs; j++){ 7818a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 7828a62d963SHong Zhang for (row=0; row<bs; row++){ 7838a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7848a62d963SHong Zhang } 7858a62d963SHong Zhang } 7868a62d963SHong Zhang jj++; 7878a62d963SHong Zhang } 7888a62d963SHong Zhang v = bmat->a; jj = bmat->j; 7898a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 7908a62d963SHong Zhang for (j=0; j<bs; j++){ 7918a62d963SHong Zhang col = bs*garray[*jj] + j; 7928a62d963SHong Zhang for (row=0; row<bs; row++){ 7938a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7948a62d963SHong Zhang } 7958a62d963SHong Zhang } 7968a62d963SHong Zhang jj++; 7978a62d963SHong Zhang } 7988a62d963SHong Zhang ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 7998a62d963SHong Zhang *nrm = 0.0; 8008a62d963SHong Zhang for (j=0; j<mat->N; j++) { 8018a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8028a62d963SHong Zhang } 8038a62d963SHong Zhang ierr = PetscFree(tmp);CHKERRQ(ierr); 8048a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 805577dd1f9SKris Buschelman PetscReal *sums; 806577dd1f9SKris Buschelman ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 8078a62d963SHong Zhang sum = 0.0; 8088a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 8098a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 8108a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 8118a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 8128a62d963SHong Zhang for (i=0; i<nz; i++) { 8138a62d963SHong Zhang for (col=0; col<bs; col++){ 8148a62d963SHong Zhang for (row=0; row<bs; row++){ 8158a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8168a62d963SHong Zhang } 8178a62d963SHong Zhang } 8188a62d963SHong Zhang } 8198a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 8208a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 8218a62d963SHong Zhang for (i=0; i<nz; i++) { 8228a62d963SHong Zhang for (col=0; col<bs; col++){ 8238a62d963SHong Zhang for (row=0; row<bs; row++){ 8248a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8258a62d963SHong Zhang } 8268a62d963SHong Zhang } 8278a62d963SHong Zhang } 8288a62d963SHong Zhang for (row=0; row<bs; row++){ 8298a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 8308a62d963SHong Zhang } 8318a62d963SHong Zhang } 8328a62d963SHong Zhang ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 833577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 834d64ed03dSBarry Smith } else { 83529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836d6de1c52SSatish Balay } 837d64ed03dSBarry Smith } 8383a40ed3dSBarry Smith PetscFunctionReturn(0); 839d6de1c52SSatish Balay } 84057b952d6SSatish Balay 841fef45726SSatish Balay /* 842fef45726SSatish Balay Creates the hash table, and sets the table 843fef45726SSatish Balay This table is created only once. 844fef45726SSatish Balay If new entried need to be added to the matrix 845fef45726SSatish Balay then the hash table has to be destroyed and 846fef45726SSatish Balay recreated. 847fef45726SSatish Balay */ 8484a2ae208SSatish Balay #undef __FUNCT__ 8494a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851596b8d2eSBarry Smith { 852596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 854596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8566849ba73SBarry Smith PetscErrorCode ierr; 857b24ad042SBarry Smith PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 858b24ad042SBarry Smith PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859b24ad042SBarry Smith PetscInt *HT,key; 8603eda8832SBarry Smith MatScalar **HD; 861329f5518SBarry Smith PetscReal tmp; 8622515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 863b24ad042SBarry Smith PetscInt ct=0,max=0; 8644a15367fSSatish Balay #endif 865fef45726SSatish Balay 866d64ed03dSBarry Smith PetscFunctionBegin; 867b24ad042SBarry Smith baij->ht_size=(PetscInt)(factor*nz); 8680bdbc534SSatish Balay size = baij->ht_size; 869fef45726SSatish Balay 8700bdbc534SSatish Balay if (baij->ht) { 8710bdbc534SSatish Balay PetscFunctionReturn(0); 872596b8d2eSBarry Smith } 8730bdbc534SSatish Balay 874fef45726SSatish Balay /* Allocate Memory for Hash Table */ 875b24ad042SBarry Smith ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876b24ad042SBarry Smith baij->ht = (PetscInt*)(baij->hd + size); 877b9e4cc15SSatish Balay HD = baij->hd; 878a07cd24cSSatish Balay HT = baij->ht; 879b9e4cc15SSatish Balay 880b9e4cc15SSatish Balay 881b24ad042SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8820bdbc534SSatish Balay 883596b8d2eSBarry Smith 884596b8d2eSBarry Smith /* Loop Over A */ 8850bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 886596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8870bdbc534SSatish Balay row = i+rstart; 8880bdbc534SSatish Balay col = aj[j]+cstart; 889596b8d2eSBarry Smith 890187ce0cbSSatish Balay key = row*Nbs + col + 1; 891c2760754SSatish Balay h1 = HASH(size,key,tmp); 8920bdbc534SSatish Balay for (k=0; k<size; k++){ 893958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 8940bdbc534SSatish Balay HT[(h1+k)%size] = key; 8950bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 896596b8d2eSBarry Smith break; 8972515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 898187ce0cbSSatish Balay } else { 899187ce0cbSSatish Balay ct++; 900187ce0cbSSatish Balay #endif 901596b8d2eSBarry Smith } 902187ce0cbSSatish Balay } 9032515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 904187ce0cbSSatish Balay if (k> max) max = k; 905187ce0cbSSatish Balay #endif 906596b8d2eSBarry Smith } 907596b8d2eSBarry Smith } 908596b8d2eSBarry Smith /* Loop Over B */ 9090bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 910596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9110bdbc534SSatish Balay row = i+rstart; 9120bdbc534SSatish Balay col = garray[bj[j]]; 913187ce0cbSSatish Balay key = row*Nbs + col + 1; 914c2760754SSatish Balay h1 = HASH(size,key,tmp); 9150bdbc534SSatish Balay for (k=0; k<size; k++){ 916958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9170bdbc534SSatish Balay HT[(h1+k)%size] = key; 9180bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 919596b8d2eSBarry Smith break; 9202515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 921187ce0cbSSatish Balay } else { 922187ce0cbSSatish Balay ct++; 923187ce0cbSSatish Balay #endif 924596b8d2eSBarry Smith } 925187ce0cbSSatish Balay } 9262515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 927187ce0cbSSatish Balay if (k> max) max = k; 928187ce0cbSSatish Balay #endif 929596b8d2eSBarry Smith } 930596b8d2eSBarry Smith } 931596b8d2eSBarry Smith 932596b8d2eSBarry Smith /* Print Summary */ 9332515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 934c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 935596b8d2eSBarry Smith if (HT[i]) {j++;} 936c38d4ed2SBarry Smith } 93763ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 938187ce0cbSSatish Balay #endif 9393a40ed3dSBarry Smith PetscFunctionReturn(0); 940596b8d2eSBarry Smith } 94157b952d6SSatish Balay 9424a2ae208SSatish Balay #undef __FUNCT__ 9434a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945bbb85fb3SSatish Balay { 946bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947dfbe8321SBarry Smith PetscErrorCode ierr; 948b24ad042SBarry Smith PetscInt nstash,reallocs; 949bbb85fb3SSatish Balay InsertMode addv; 950bbb85fb3SSatish Balay 951bbb85fb3SSatish Balay PetscFunctionBegin; 952bbb85fb3SSatish Balay if (baij->donotstash) { 953bbb85fb3SSatish Balay PetscFunctionReturn(0); 954bbb85fb3SSatish Balay } 955bbb85fb3SSatish Balay 956bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 957bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 958bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 95929bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 960bbb85fb3SSatish Balay } 961bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 962bbb85fb3SSatish Balay 9638798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9648798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9658798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 96663ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 96746680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 96863ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 969bbb85fb3SSatish Balay PetscFunctionReturn(0); 970bbb85fb3SSatish Balay } 971bbb85fb3SSatish Balay 9724a2ae208SSatish Balay #undef __FUNCT__ 9734a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975bbb85fb3SSatish Balay { 976bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 977a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 9786849ba73SBarry Smith PetscErrorCode ierr; 979b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 980b24ad042SBarry Smith PetscInt *row,*col,other_disassembled; 9817c922b88SBarry Smith PetscTruth r1,r2,r3; 9823eda8832SBarry Smith MatScalar *val; 983bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 984b24ad042SBarry Smith PetscMPIInt n; 985bbb85fb3SSatish Balay 986bbb85fb3SSatish Balay PetscFunctionBegin; 987bbb85fb3SSatish Balay if (!baij->donotstash) { 988a2d1c673SSatish Balay while (1) { 9898798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990a2d1c673SSatish Balay if (!flg) break; 991a2d1c673SSatish Balay 992bbb85fb3SSatish Balay for (i=0; i<n;) { 993bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 994bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995bbb85fb3SSatish Balay if (j < n) ncols = j-i; 996bbb85fb3SSatish Balay else ncols = n-i; 997bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 99893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999bbb85fb3SSatish Balay i = j; 1000bbb85fb3SSatish Balay } 1001bbb85fb3SSatish Balay } 10028798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1004a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1005a2d1c673SSatish Balay restore the original flags */ 1006a2d1c673SSatish Balay r1 = baij->roworiented; 1007a2d1c673SSatish Balay r2 = a->roworiented; 1008a2d1c673SSatish Balay r3 = b->roworiented; 10097c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10107c922b88SBarry Smith a->roworiented = PETSC_FALSE; 10117c922b88SBarry Smith b->roworiented = PETSC_FALSE; 1012a2d1c673SSatish Balay while (1) { 10138798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014a2d1c673SSatish Balay if (!flg) break; 1015a2d1c673SSatish Balay 1016a2d1c673SSatish Balay for (i=0; i<n;) { 1017a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1018a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019a2d1c673SSatish Balay if (j < n) ncols = j-i; 1020a2d1c673SSatish Balay else ncols = n-i; 102193fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022a2d1c673SSatish Balay i = j; 1023a2d1c673SSatish Balay } 1024a2d1c673SSatish Balay } 10258798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026a2d1c673SSatish Balay baij->roworiented = r1; 1027a2d1c673SSatish Balay a->roworiented = r2; 1028a2d1c673SSatish Balay b->roworiented = r3; 1029bbb85fb3SSatish Balay } 1030bbb85fb3SSatish Balay 1031bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033bbb85fb3SSatish Balay 1034bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1035bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1036bbb85fb3SSatish Balay /* 1037bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1038bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1039bbb85fb3SSatish Balay */ 1040bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1043bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044bbb85fb3SSatish Balay } 1045bbb85fb3SSatish Balay } 1046bbb85fb3SSatish Balay 1047bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049bbb85fb3SSatish Balay } 105073e7a558SHong Zhang b->compressedrow.use = PETSC_TRUE; 1051bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053bbb85fb3SSatish Balay 10542515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 1055bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 105663ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1057bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1058bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1059bbb85fb3SSatish Balay } 1060bbb85fb3SSatish Balay #endif 1061bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065bbb85fb3SSatish Balay } 1066bbb85fb3SSatish Balay 1067606d414cSSatish Balay if (baij->rowvalues) { 1068606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069606d414cSSatish Balay baij->rowvalues = 0; 1070606d414cSSatish Balay } 1071bbb85fb3SSatish Balay PetscFunctionReturn(0); 1072bbb85fb3SSatish Balay } 107357b952d6SSatish Balay 10744a2ae208SSatish Balay #undef __FUNCT__ 10754a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10766849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107757b952d6SSatish Balay { 107857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1079dfbe8321SBarry Smith PetscErrorCode ierr; 1080b24ad042SBarry Smith PetscMPIInt size = baij->size,rank = baij->rank; 1081521d7252SBarry Smith PetscInt bs = mat->bs; 108232077d6dSBarry Smith PetscTruth iascii,isdraw; 1083b0a32e0cSBarry Smith PetscViewer sviewer; 1084f3ef73ceSBarry Smith PetscViewerFormat format; 108557b952d6SSatish Balay 1086d64ed03dSBarry Smith PetscFunctionBegin; 1087f81bd7ccSHong Zhang /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 108832077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1089fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 109032077d6dSBarry Smith if (iascii) { 1091b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1092456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10934e220ebcSLois Curfman McInnes MatInfo info; 1094d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1095d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 109677431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 109777431f27SBarry Smith rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1098521d7252SBarry Smith mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1099d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 110077431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1101d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 110277431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1103b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 110457b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 11053a40ed3dSBarry Smith PetscFunctionReturn(0); 1106fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 110777431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 11083a40ed3dSBarry Smith PetscFunctionReturn(0); 110904929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 111004929863SHong Zhang PetscFunctionReturn(0); 111157b952d6SSatish Balay } 111257b952d6SSatish Balay } 111357b952d6SSatish Balay 11140f5bd95cSBarry Smith if (isdraw) { 1115b0a32e0cSBarry Smith PetscDraw draw; 111657b952d6SSatish Balay PetscTruth isnull; 1117b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1118b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 111957b952d6SSatish Balay } 112057b952d6SSatish Balay 112157b952d6SSatish Balay if (size == 1) { 1122873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 112357b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1124d64ed03dSBarry Smith } else { 112557b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 112657b952d6SSatish Balay Mat A; 112757b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1128b24ad042SBarry Smith PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11293eda8832SBarry Smith MatScalar *a; 113057b952d6SSatish Balay 1131f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1132f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 1133f69a0ea3SMatthew Knepley ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 113457b952d6SSatish Balay if (!rank) { 1135f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1136d64ed03dSBarry Smith } else { 1137f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 113857b952d6SSatish Balay } 1139f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1140521d7252SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 114152e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 114257b952d6SSatish Balay 114357b952d6SSatish Balay /* copy over the A part */ 114457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 114557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1146b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 114757b952d6SSatish Balay 114857b952d6SSatish Balay for (i=0; i<mbs; i++) { 114957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 115057b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115157b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 115257b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 115357b952d6SSatish Balay for (k=0; k<bs; k++) { 115493fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1155cee3aa6bSSatish Balay col++; a += bs; 115657b952d6SSatish Balay } 115757b952d6SSatish Balay } 115857b952d6SSatish Balay } 115957b952d6SSatish Balay /* copy over the B part */ 116057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 116157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 116257b952d6SSatish Balay for (i=0; i<mbs; i++) { 116357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 116457b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 116557b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 116657b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 116757b952d6SSatish Balay for (k=0; k<bs; k++) { 116893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1169cee3aa6bSSatish Balay col++; a += bs; 117057b952d6SSatish Balay } 117157b952d6SSatish Balay } 117257b952d6SSatish Balay } 1173606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11746d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11756d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117655843e3eSBarry Smith /* 117755843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1178b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 117955843e3eSBarry Smith */ 1180b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1181f14a1c24SBarry Smith if (!rank) { 1182e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11836831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 118457b952d6SSatish Balay } 1185b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 118657b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 118757b952d6SSatish Balay } 11883a40ed3dSBarry Smith PetscFunctionReturn(0); 118957b952d6SSatish Balay } 119057b952d6SSatish Balay 11914a2ae208SSatish Balay #undef __FUNCT__ 11924a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1193dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 119457b952d6SSatish Balay { 1195dfbe8321SBarry Smith PetscErrorCode ierr; 119632077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 119757b952d6SSatish Balay 1198d64ed03dSBarry Smith PetscFunctionBegin; 119932077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1200fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1201b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1202fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 120332077d6dSBarry Smith if (iascii || isdraw || issocket || isbinary) { 12047b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12055cd90555SBarry Smith } else { 120679a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 120757b952d6SSatish Balay } 12083a40ed3dSBarry Smith PetscFunctionReturn(0); 120957b952d6SSatish Balay } 121057b952d6SSatish Balay 12114a2ae208SSatish Balay #undef __FUNCT__ 12124a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1213dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 121479bdfe76SSatish Balay { 121579bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1216dfbe8321SBarry Smith PetscErrorCode ierr; 121779bdfe76SSatish Balay 1218d64ed03dSBarry Smith PetscFunctionBegin; 1219aa482453SBarry Smith #if defined(PETSC_USE_LOG) 122077431f27SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 122179bdfe76SSatish Balay #endif 12228798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12238798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1224606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 122579bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 122679bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1227aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12280f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 122948e59246SSatish Balay #else 1230606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 123148e59246SSatish Balay #endif 1232606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1233606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1234606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1235606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1236606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1237606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12386fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12396fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12406fa18ffdSBarry Smith #endif 1241606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1242901853e0SKris Buschelman 1243901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1244901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1245901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1246901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1247aac34f13SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1248901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1249901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 12503a40ed3dSBarry Smith PetscFunctionReturn(0); 125179bdfe76SSatish Balay } 125279bdfe76SSatish Balay 12534a2ae208SSatish Balay #undef __FUNCT__ 12544a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1255dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1256cee3aa6bSSatish Balay { 1257cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1258dfbe8321SBarry Smith PetscErrorCode ierr; 1259b24ad042SBarry Smith PetscInt nt; 1260cee3aa6bSSatish Balay 1261d64ed03dSBarry Smith PetscFunctionBegin; 1262e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1263273d9f13SBarry Smith if (nt != A->n) { 126429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 126547b4a8eaSLois Curfman McInnes } 1266e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1267273d9f13SBarry Smith if (nt != A->m) { 126829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 126947b4a8eaSLois Curfman McInnes } 127043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 127243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1273f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 127443a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12753a40ed3dSBarry Smith PetscFunctionReturn(0); 1276cee3aa6bSSatish Balay } 1277cee3aa6bSSatish Balay 12784a2ae208SSatish Balay #undef __FUNCT__ 12794a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1280dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1281cee3aa6bSSatish Balay { 1282cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1283dfbe8321SBarry Smith PetscErrorCode ierr; 1284d64ed03dSBarry Smith 1285d64ed03dSBarry Smith PetscFunctionBegin; 128643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1287f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 128843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1289f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12903a40ed3dSBarry Smith PetscFunctionReturn(0); 1291cee3aa6bSSatish Balay } 1292cee3aa6bSSatish Balay 12934a2ae208SSatish Balay #undef __FUNCT__ 12944a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1295dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1296cee3aa6bSSatish Balay { 1297cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1298dfbe8321SBarry Smith PetscErrorCode ierr; 1299a5ff213dSBarry Smith PetscTruth merged; 1300cee3aa6bSSatish Balay 1301d64ed03dSBarry Smith PetscFunctionBegin; 1302a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1303cee3aa6bSSatish Balay /* do nondiagonal part */ 13047c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1305a5ff213dSBarry Smith if (!merged) { 1306cee3aa6bSSatish Balay /* send it on its way */ 1307537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1308cee3aa6bSSatish Balay /* do local part */ 13097c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1310cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1311a5ff213dSBarry Smith /* inserted in yy until the next line */ 1312639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1313a5ff213dSBarry Smith } else { 1314a5ff213dSBarry Smith /* do local part */ 1315a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1316a5ff213dSBarry Smith /* send it on its way */ 1317a5ff213dSBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1319a5ff213dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1320a5ff213dSBarry Smith } 13213a40ed3dSBarry Smith PetscFunctionReturn(0); 1322cee3aa6bSSatish Balay } 1323cee3aa6bSSatish Balay 13244a2ae208SSatish Balay #undef __FUNCT__ 13254a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1326dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327cee3aa6bSSatish Balay { 1328cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329dfbe8321SBarry Smith PetscErrorCode ierr; 1330cee3aa6bSSatish Balay 1331d64ed03dSBarry Smith PetscFunctionBegin; 1332cee3aa6bSSatish Balay /* do nondiagonal part */ 13337c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1334cee3aa6bSSatish Balay /* send it on its way */ 1335537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1336cee3aa6bSSatish Balay /* do local part */ 13377c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1338cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1339cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1340cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1341537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13423a40ed3dSBarry Smith PetscFunctionReturn(0); 1343cee3aa6bSSatish Balay } 1344cee3aa6bSSatish Balay 1345cee3aa6bSSatish Balay /* 1346cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1347cee3aa6bSSatish Balay diagonal block 1348cee3aa6bSSatish Balay */ 13494a2ae208SSatish Balay #undef __FUNCT__ 13504a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1351dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1352cee3aa6bSSatish Balay { 1353cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1354dfbe8321SBarry Smith PetscErrorCode ierr; 1355d64ed03dSBarry Smith 1356d64ed03dSBarry Smith PetscFunctionBegin; 1357273d9f13SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13583a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13593a40ed3dSBarry Smith PetscFunctionReturn(0); 1360cee3aa6bSSatish Balay } 1361cee3aa6bSSatish Balay 13624a2ae208SSatish Balay #undef __FUNCT__ 13634a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1364f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1365cee3aa6bSSatish Balay { 1366cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1367dfbe8321SBarry Smith PetscErrorCode ierr; 1368d64ed03dSBarry Smith 1369d64ed03dSBarry Smith PetscFunctionBegin; 1370f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1371f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 13723a40ed3dSBarry Smith PetscFunctionReturn(0); 1373cee3aa6bSSatish Balay } 1374026e39d0SSatish Balay 13754a2ae208SSatish Balay #undef __FUNCT__ 13764a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1377b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1378acdf5bf4SSatish Balay { 1379acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 138087828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13816849ba73SBarry Smith PetscErrorCode ierr; 1382521d7252SBarry Smith PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1383b24ad042SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1384b24ad042SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstart; 1385acdf5bf4SSatish Balay 1386d64ed03dSBarry Smith PetscFunctionBegin; 1387abc0a331SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1388acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1389acdf5bf4SSatish Balay 1390acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1391acdf5bf4SSatish Balay /* 1392acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1393acdf5bf4SSatish Balay */ 1394acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1395b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1396bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1397acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1398acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1399acdf5bf4SSatish Balay } 1400b24ad042SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1401b24ad042SBarry Smith mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1402acdf5bf4SSatish Balay } 1403acdf5bf4SSatish Balay 140429bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1405d9d09a02SSatish Balay lrow = row - brstart; 1406acdf5bf4SSatish Balay 1407acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1408acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1409acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1410f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1411f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1412acdf5bf4SSatish Balay nztot = nzA + nzB; 1413acdf5bf4SSatish Balay 1414acdf5bf4SSatish Balay cmap = mat->garray; 1415acdf5bf4SSatish Balay if (v || idx) { 1416acdf5bf4SSatish Balay if (nztot) { 1417acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1418b24ad042SBarry Smith PetscInt imark = -1; 1419acdf5bf4SSatish Balay if (v) { 1420acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1421acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1422d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1423acdf5bf4SSatish Balay else break; 1424acdf5bf4SSatish Balay } 1425acdf5bf4SSatish Balay imark = i; 1426acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1427acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1428acdf5bf4SSatish Balay } 1429acdf5bf4SSatish Balay if (idx) { 1430acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1431acdf5bf4SSatish Balay if (imark > -1) { 1432acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1433bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1434acdf5bf4SSatish Balay } 1435acdf5bf4SSatish Balay } else { 1436acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1437d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1438d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1439acdf5bf4SSatish Balay else break; 1440acdf5bf4SSatish Balay } 1441acdf5bf4SSatish Balay imark = i; 1442acdf5bf4SSatish Balay } 1443d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1444d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1445acdf5bf4SSatish Balay } 1446d64ed03dSBarry Smith } else { 1447d212a18eSSatish Balay if (idx) *idx = 0; 1448d212a18eSSatish Balay if (v) *v = 0; 1449d212a18eSSatish Balay } 1450acdf5bf4SSatish Balay } 1451acdf5bf4SSatish Balay *nz = nztot; 1452f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1453f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14543a40ed3dSBarry Smith PetscFunctionReturn(0); 1455acdf5bf4SSatish Balay } 1456acdf5bf4SSatish Balay 14574a2ae208SSatish Balay #undef __FUNCT__ 14584a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1459b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1460acdf5bf4SSatish Balay { 1461acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1462d64ed03dSBarry Smith 1463d64ed03dSBarry Smith PetscFunctionBegin; 1464abc0a331SBarry Smith if (!baij->getrowactive) { 146529bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1466acdf5bf4SSatish Balay } 1467acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14683a40ed3dSBarry Smith PetscFunctionReturn(0); 1469acdf5bf4SSatish Balay } 1470acdf5bf4SSatish Balay 14714a2ae208SSatish Balay #undef __FUNCT__ 14724a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1473dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 147458667388SSatish Balay { 147558667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1476dfbe8321SBarry Smith PetscErrorCode ierr; 1477d64ed03dSBarry Smith 1478d64ed03dSBarry Smith PetscFunctionBegin; 147958667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 148058667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14813a40ed3dSBarry Smith PetscFunctionReturn(0); 148258667388SSatish Balay } 14830ac07820SSatish Balay 14844a2ae208SSatish Balay #undef __FUNCT__ 14854a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1486dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14870ac07820SSatish Balay { 14884e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14894e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1490dfbe8321SBarry Smith PetscErrorCode ierr; 1491329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14920ac07820SSatish Balay 1493d64ed03dSBarry Smith PetscFunctionBegin; 1494521d7252SBarry Smith info->block_size = (PetscReal)matin->bs; 14954e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14960e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1497de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 14984e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 14990e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1500de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 15010ac07820SSatish Balay if (flag == MAT_LOCAL) { 15024e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15034e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15044e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15054e220ebcSLois Curfman McInnes info->memory = isend[3]; 15064e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15070ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1508d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 15094e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15104e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15114e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15124e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15134e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15140ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1515d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 15164e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15174e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15184e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15194e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15204e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1521d41123aaSBarry Smith } else { 152277431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15230ac07820SSatish Balay } 1524f6275e2eSBarry Smith info->rows_global = (PetscReal)A->M; 1525f6275e2eSBarry Smith info->columns_global = (PetscReal)A->N; 1526f6275e2eSBarry Smith info->rows_local = (PetscReal)A->m; 1527f6275e2eSBarry Smith info->columns_local = (PetscReal)A->N; 15284e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15294e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15304e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15313a40ed3dSBarry Smith PetscFunctionReturn(0); 15320ac07820SSatish Balay } 15330ac07820SSatish Balay 15344a2ae208SSatish Balay #undef __FUNCT__ 15354a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1536dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 153758667388SSatish Balay { 153858667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1539dfbe8321SBarry Smith PetscErrorCode ierr; 154058667388SSatish Balay 1541d64ed03dSBarry Smith PetscFunctionBegin; 154212c028f9SKris Buschelman switch (op) { 154312c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 154412c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 154512c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 154612c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 154712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 154812c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 154912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 155098305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155198305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155212c028f9SKris Buschelman break; 155312c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15547c922b88SBarry Smith a->roworiented = PETSC_TRUE; 155598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 155698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 155712c028f9SKris Buschelman break; 155812c028f9SKris Buschelman case MAT_ROWS_SORTED: 155912c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 156012c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 156163ba0a88SBarry Smith ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 156212c028f9SKris Buschelman break; 156312c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15647c922b88SBarry Smith a->roworiented = PETSC_FALSE; 156598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 156698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 156712c028f9SKris Buschelman break; 156812c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15697c922b88SBarry Smith a->donotstash = PETSC_TRUE; 157012c028f9SKris Buschelman break; 157112c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 157229bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 157312c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15747c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 157512c028f9SKris Buschelman break; 157677e54ba9SKris Buschelman case MAT_SYMMETRIC: 157777e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15782188ac68SBarry Smith case MAT_HERMITIAN: 15792188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 15802188ac68SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 15812188ac68SBarry Smith break; 15829a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 15839a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 15849a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 15859a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 158677e54ba9SKris Buschelman break; 158712c028f9SKris Buschelman default: 158829bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 1589d64ed03dSBarry Smith } 15903a40ed3dSBarry Smith PetscFunctionReturn(0); 159158667388SSatish Balay } 159258667388SSatish Balay 15934a2ae208SSatish Balay #undef __FUNCT__ 15944a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1595dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15960ac07820SSatish Balay { 15970ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15980ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15990ac07820SSatish Balay Mat B; 1600dfbe8321SBarry Smith PetscErrorCode ierr; 1601b24ad042SBarry Smith PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1602521d7252SBarry Smith PetscInt bs=A->bs,mbs=baij->mbs; 16033eda8832SBarry Smith MatScalar *a; 16040ac07820SSatish Balay 1605d64ed03dSBarry Smith PetscFunctionBegin; 160629bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1607f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1608f69a0ea3SMatthew Knepley ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1609f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1610521d7252SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 16110ac07820SSatish Balay 16120ac07820SSatish Balay /* copy over the A part */ 16130ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16140ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1615b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 16160ac07820SSatish Balay 16170ac07820SSatish Balay for (i=0; i<mbs; i++) { 16180ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16190ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16200ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16210ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 16220ac07820SSatish Balay for (k=0; k<bs; k++) { 162393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16240ac07820SSatish Balay col++; a += bs; 16250ac07820SSatish Balay } 16260ac07820SSatish Balay } 16270ac07820SSatish Balay } 16280ac07820SSatish Balay /* copy over the B part */ 16290ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16300ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16310ac07820SSatish Balay for (i=0; i<mbs; i++) { 16320ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16330ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16340ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16350ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16360ac07820SSatish Balay for (k=0; k<bs; k++) { 163793fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16380ac07820SSatish Balay col++; a += bs; 16390ac07820SSatish Balay } 16400ac07820SSatish Balay } 16410ac07820SSatish Balay } 1642606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16430ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16440ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16450ac07820SSatish Balay 16467c922b88SBarry Smith if (matout) { 16470ac07820SSatish Balay *matout = B; 16480ac07820SSatish Balay } else { 1649273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16500ac07820SSatish Balay } 16513a40ed3dSBarry Smith PetscFunctionReturn(0); 16520ac07820SSatish Balay } 16530e95ebc0SSatish Balay 16544a2ae208SSatish Balay #undef __FUNCT__ 16554a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1656dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16570e95ebc0SSatish Balay { 165836c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 165936c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1660dfbe8321SBarry Smith PetscErrorCode ierr; 1661b24ad042SBarry Smith PetscInt s1,s2,s3; 16620e95ebc0SSatish Balay 1663d64ed03dSBarry Smith PetscFunctionBegin; 166436c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 166536c4a09eSSatish Balay if (rr) { 166636c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 166729bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 166836c4a09eSSatish Balay /* Overlap communication with computation. */ 166936c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 167036c4a09eSSatish Balay } 16710e95ebc0SSatish Balay if (ll) { 16720e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 167329bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1674a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16750e95ebc0SSatish Balay } 167636c4a09eSSatish Balay /* scale the diagonal block */ 167736c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 167836c4a09eSSatish Balay 167936c4a09eSSatish Balay if (rr) { 168036c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 168136c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1682a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 168336c4a09eSSatish Balay } 168436c4a09eSSatish Balay 16853a40ed3dSBarry Smith PetscFunctionReturn(0); 16860e95ebc0SSatish Balay } 16870e95ebc0SSatish Balay 16884a2ae208SSatish Balay #undef __FUNCT__ 16894a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1690f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 16910ac07820SSatish Balay { 16920ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16936849ba73SBarry Smith PetscErrorCode ierr; 1694b24ad042SBarry Smith PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1695f4df32b1SMatthew Knepley PetscInt i,*owners = l->rowners; 1696b24ad042SBarry Smith PetscInt *nprocs,j,idx,nsends,row; 1697b24ad042SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 16986543fbbaSBarry Smith PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1699521d7252SBarry Smith PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 17000ac07820SSatish Balay MPI_Comm comm = A->comm; 17010ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17020ac07820SSatish Balay MPI_Status recv_status,*send_status; 17036543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17046543fbbaSBarry Smith PetscTruth found = PETSC_FALSE; 17056543fbbaSBarry Smith #endif 17060ac07820SSatish Balay 1707d64ed03dSBarry Smith PetscFunctionBegin; 17080ac07820SSatish Balay /* first count number of contributors to each processor */ 1709b24ad042SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1710b24ad042SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1711b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 17126543fbbaSBarry Smith j = 0; 17130ac07820SSatish Balay for (i=0; i<N; i++) { 17146543fbbaSBarry Smith if (lastidx > (idx = rows[i])) j = 0; 17156543fbbaSBarry Smith lastidx = idx; 17166543fbbaSBarry Smith for (; j<size; j++) { 17170ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 17186543fbbaSBarry Smith nprocs[2*j]++; 17196543fbbaSBarry Smith nprocs[2*j+1] = 1; 17206543fbbaSBarry Smith owner[i] = j; 17216543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17226543fbbaSBarry Smith found = PETSC_TRUE; 17236543fbbaSBarry Smith #endif 17246543fbbaSBarry Smith break; 17250ac07820SSatish Balay } 17260ac07820SSatish Balay } 17276543fbbaSBarry Smith #if defined(PETSC_DEBUG) 172829bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17296543fbbaSBarry Smith found = PETSC_FALSE; 17306543fbbaSBarry Smith #endif 17310ac07820SSatish Balay } 1732c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17330ac07820SSatish Balay 17340ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1735c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17360ac07820SSatish Balay 17370ac07820SSatish Balay /* post receives: */ 1738b24ad042SBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1739b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17400ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1741b24ad042SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17420ac07820SSatish Balay } 17430ac07820SSatish Balay 17440ac07820SSatish Balay /* do sends: 17450ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17460ac07820SSatish Balay the ith processor 17470ac07820SSatish Balay */ 1748b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1749b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1750b24ad042SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 17510ac07820SSatish Balay starts[0] = 0; 1752c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17530ac07820SSatish Balay for (i=0; i<N; i++) { 17540ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17550ac07820SSatish Balay } 17560ac07820SSatish Balay 17570ac07820SSatish Balay starts[0] = 0; 1758c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17590ac07820SSatish Balay count = 0; 17600ac07820SSatish Balay for (i=0; i<size; i++) { 1761c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1762b24ad042SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17630ac07820SSatish Balay } 17640ac07820SSatish Balay } 1765606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17660ac07820SSatish Balay 17670ac07820SSatish Balay base = owners[rank]*bs; 17680ac07820SSatish Balay 17690ac07820SSatish Balay /* wait on receives */ 1770b24ad042SBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 17710ac07820SSatish Balay source = lens + nrecvs; 17720ac07820SSatish Balay count = nrecvs; slen = 0; 17730ac07820SSatish Balay while (count) { 1774ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17750ac07820SSatish Balay /* unpack receives into our local space */ 1776b24ad042SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 17770ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17780ac07820SSatish Balay lens[imdex] = n; 17790ac07820SSatish Balay slen += n; 17800ac07820SSatish Balay count--; 17810ac07820SSatish Balay } 1782606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17830ac07820SSatish Balay 17840ac07820SSatish Balay /* move the data into the send scatter */ 1785b24ad042SBarry Smith ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 17860ac07820SSatish Balay count = 0; 17870ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17880ac07820SSatish Balay values = rvalues + i*nmax; 17890ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17900ac07820SSatish Balay lrows[count++] = values[j] - base; 17910ac07820SSatish Balay } 17920ac07820SSatish Balay } 1793606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1794606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1795606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1796606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17970ac07820SSatish Balay 17980ac07820SSatish Balay /* actually zap the local rows */ 179972dacd9aSBarry Smith /* 180072dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 180172dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 180272dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180372dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180472dacd9aSBarry Smith 1805f4df32b1SMatthew Knepley Contributed by: Matthew Knepley 180672dacd9aSBarry Smith */ 18079c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1808f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1809f4df32b1SMatthew Knepley if ((diag != 0.0) && (l->A->M == l->A->N)) { 1810f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1811f4df32b1SMatthew Knepley } else if (diag != 0.0) { 1812f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1813fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 181429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1815fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18166525c446SSatish Balay } 1817a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1818a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1819f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1820a07cd24cSSatish Balay } 1821a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18239c957beeSSatish Balay } else { 1824f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1825a07cd24cSSatish Balay } 18269c957beeSSatish Balay 1827606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1828a07cd24cSSatish Balay 18290ac07820SSatish Balay /* wait on sends */ 18300ac07820SSatish Balay if (nsends) { 183182502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1832ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1833606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18340ac07820SSatish Balay } 1835606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1836606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18370ac07820SSatish Balay 18383a40ed3dSBarry Smith PetscFunctionReturn(0); 18390ac07820SSatish Balay } 184072dacd9aSBarry Smith 18414a2ae208SSatish Balay #undef __FUNCT__ 18424a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1843dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1844ba4ca20aSSatish Balay { 1845ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 184625fdafccSSatish Balay MPI_Comm comm = A->comm; 1847b24ad042SBarry Smith static PetscTruth called = PETSC_FALSE; 1848dfbe8321SBarry Smith PetscErrorCode ierr; 1849ba4ca20aSSatish Balay 1850d64ed03dSBarry Smith PetscFunctionBegin; 18513a40ed3dSBarry Smith if (!a->rank) { 18523a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 185325fdafccSSatish Balay } 1854b24ad042SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1855d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1856d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18573a40ed3dSBarry Smith PetscFunctionReturn(0); 1858ba4ca20aSSatish Balay } 18590ac07820SSatish Balay 18604a2ae208SSatish Balay #undef __FUNCT__ 18614a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1862dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1863bb5a7306SBarry Smith { 1864bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1865dfbe8321SBarry Smith PetscErrorCode ierr; 1866d64ed03dSBarry Smith 1867d64ed03dSBarry Smith PetscFunctionBegin; 1868bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18693a40ed3dSBarry Smith PetscFunctionReturn(0); 1870bb5a7306SBarry Smith } 1871bb5a7306SBarry Smith 18726849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18730ac07820SSatish Balay 18744a2ae208SSatish Balay #undef __FUNCT__ 18754a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1876dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18777fc3c18eSBarry Smith { 18787fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18797fc3c18eSBarry Smith Mat a,b,c,d; 18807fc3c18eSBarry Smith PetscTruth flg; 1881dfbe8321SBarry Smith PetscErrorCode ierr; 18827fc3c18eSBarry Smith 18837fc3c18eSBarry Smith PetscFunctionBegin; 18847fc3c18eSBarry Smith a = matA->A; b = matA->B; 18857fc3c18eSBarry Smith c = matB->A; d = matB->B; 18867fc3c18eSBarry Smith 18877fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1888abc0a331SBarry Smith if (flg) { 18897fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18907fc3c18eSBarry Smith } 18917fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18927fc3c18eSBarry Smith PetscFunctionReturn(0); 18937fc3c18eSBarry Smith } 18947fc3c18eSBarry Smith 1895273d9f13SBarry Smith 18964a2ae208SSatish Balay #undef __FUNCT__ 18974a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1898dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1899273d9f13SBarry Smith { 1900dfbe8321SBarry Smith PetscErrorCode ierr; 1901273d9f13SBarry Smith 1902273d9f13SBarry Smith PetscFunctionBegin; 1903273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1904273d9f13SBarry Smith PetscFunctionReturn(0); 1905273d9f13SBarry Smith } 1906273d9f13SBarry Smith 190779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1908cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1909cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1910cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1911cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1912cc2dc46cSBarry Smith MatMult_MPIBAIJ, 191397304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 19147c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19157c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1916cc2dc46cSBarry Smith 0, 1917cc2dc46cSBarry Smith 0, 1918cc2dc46cSBarry Smith 0, 191997304618SKris Buschelman /*10*/ 0, 1920cc2dc46cSBarry Smith 0, 1921cc2dc46cSBarry Smith 0, 1922cc2dc46cSBarry Smith 0, 1923cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 192497304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 19257fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1926cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1927cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1928cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 192997304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1930cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1931cc2dc46cSBarry Smith 0, 1932cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1933cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 193497304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 1935cc2dc46cSBarry Smith 0, 1936cc2dc46cSBarry Smith 0, 1937cc2dc46cSBarry Smith 0, 1938cc2dc46cSBarry Smith 0, 193997304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 1940273d9f13SBarry Smith 0, 1941cc2dc46cSBarry Smith 0, 1942cc2dc46cSBarry Smith 0, 1943cc2dc46cSBarry Smith 0, 194497304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 1945cc2dc46cSBarry Smith 0, 1946cc2dc46cSBarry Smith 0, 1947cc2dc46cSBarry Smith 0, 1948cc2dc46cSBarry Smith 0, 194997304618SKris Buschelman /*40*/ 0, 1950cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1951cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1952cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1953cc2dc46cSBarry Smith 0, 195497304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ, 1955cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1956cc2dc46cSBarry Smith 0, 1957cc2dc46cSBarry Smith 0, 1958cc2dc46cSBarry Smith 0, 1959521d7252SBarry Smith /*50*/ 0, 1960cc2dc46cSBarry Smith 0, 1961cc2dc46cSBarry Smith 0, 1962cc2dc46cSBarry Smith 0, 1963cc2dc46cSBarry Smith 0, 196497304618SKris Buschelman /*55*/ 0, 1965cc2dc46cSBarry Smith 0, 1966cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1967cc2dc46cSBarry Smith 0, 1968cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 196997304618SKris Buschelman /*60*/ 0, 1970f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1971f14a1c24SBarry Smith MatView_MPIBAIJ, 19728a124369SBarry Smith MatGetPetscMaps_Petsc, 19737843d17aSBarry Smith 0, 197497304618SKris Buschelman /*65*/ 0, 19757843d17aSBarry Smith 0, 19767843d17aSBarry Smith 0, 19777843d17aSBarry Smith 0, 19787843d17aSBarry Smith 0, 197997304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ, 19807843d17aSBarry Smith 0, 198197304618SKris Buschelman 0, 198297304618SKris Buschelman 0, 198397304618SKris Buschelman 0, 198497304618SKris Buschelman /*75*/ 0, 198597304618SKris Buschelman 0, 198697304618SKris Buschelman 0, 198797304618SKris Buschelman 0, 198897304618SKris Buschelman 0, 198997304618SKris Buschelman /*80*/ 0, 199097304618SKris Buschelman 0, 199197304618SKris Buschelman 0, 199297304618SKris Buschelman 0, 1993865e5f61SKris Buschelman MatLoad_MPIBAIJ, 1994865e5f61SKris Buschelman /*85*/ 0, 1995865e5f61SKris Buschelman 0, 1996865e5f61SKris Buschelman 0, 1997865e5f61SKris Buschelman 0, 1998865e5f61SKris Buschelman 0, 1999865e5f61SKris Buschelman /*90*/ 0, 2000865e5f61SKris Buschelman 0, 2001865e5f61SKris Buschelman 0, 2002865e5f61SKris Buschelman 0, 2003865e5f61SKris Buschelman 0, 2004865e5f61SKris Buschelman /*95*/ 0, 2005865e5f61SKris Buschelman 0, 2006865e5f61SKris Buschelman 0, 2007865e5f61SKris Buschelman 0}; 200879bdfe76SSatish Balay 20095ef9f2a5SBarry Smith 2010e18c124aSSatish Balay EXTERN_C_BEGIN 20114a2ae208SSatish Balay #undef __FUNCT__ 20124a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2013be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20145ef9f2a5SBarry Smith { 20155ef9f2a5SBarry Smith PetscFunctionBegin; 20165ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20175ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20185ef9f2a5SBarry Smith PetscFunctionReturn(0); 20195ef9f2a5SBarry Smith } 2020e18c124aSSatish Balay EXTERN_C_END 202179bdfe76SSatish Balay 2022273d9f13SBarry Smith EXTERN_C_BEGIN 2023f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2024d94109b8SHong Zhang EXTERN_C_END 2025d94109b8SHong Zhang 2026aac34f13SBarry Smith #undef __FUNCT__ 2027aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2028aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2029aac34f13SBarry Smith { 2030aac34f13SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2031aac34f13SBarry Smith PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2032aac34f13SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2033aac34f13SBarry Smith const PetscInt *JJ; 2034aac34f13SBarry Smith PetscScalar *values; 2035aac34f13SBarry Smith PetscErrorCode ierr; 2036aac34f13SBarry Smith 2037aac34f13SBarry Smith PetscFunctionBegin; 2038aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2039aac34f13SBarry Smith if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2040aac34f13SBarry Smith #endif 2041aac34f13SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2042aac34f13SBarry Smith o_nnz = d_nnz + m; 2043aac34f13SBarry Smith 2044aac34f13SBarry Smith for (i=0; i<m; i++) { 2045aac34f13SBarry Smith nnz = I[i+1]- I[i]; 2046aac34f13SBarry Smith JJ = J + I[i]; 2047aac34f13SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 2048aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2049aac34f13SBarry Smith if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2050aac34f13SBarry Smith #endif 2051aac34f13SBarry Smith for (j=0; j<nnz; j++) { 2052aac34f13SBarry Smith if (*JJ >= cstart) break; 2053aac34f13SBarry Smith JJ++; 2054aac34f13SBarry Smith } 2055aac34f13SBarry Smith d = 0; 2056aac34f13SBarry Smith for (; j<nnz; j++) { 2057aac34f13SBarry Smith if (*JJ++ >= cend) break; 2058aac34f13SBarry Smith d++; 2059aac34f13SBarry Smith } 2060aac34f13SBarry Smith d_nnz[i] = d; 2061aac34f13SBarry Smith o_nnz[i] = nnz - d; 2062aac34f13SBarry Smith } 2063aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2064aac34f13SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2065aac34f13SBarry Smith 2066aac34f13SBarry Smith if (v) values = (PetscScalar*)v; 2067aac34f13SBarry Smith else { 2068aac34f13SBarry Smith ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2069aac34f13SBarry Smith ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2070aac34f13SBarry Smith } 2071aac34f13SBarry Smith 2072aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2073aac34f13SBarry Smith for (i=0; i<m; i++) { 2074aac34f13SBarry Smith ii = i + rstart; 2075aac34f13SBarry Smith nnz = I[i+1]- I[i]; 2076*3f01e284SBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2077aac34f13SBarry Smith } 2078aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2081aac34f13SBarry Smith 2082aac34f13SBarry Smith if (!v) { 2083aac34f13SBarry Smith ierr = PetscFree(values);CHKERRQ(ierr); 2084aac34f13SBarry Smith } 2085aac34f13SBarry Smith PetscFunctionReturn(0); 2086aac34f13SBarry Smith } 2087aac34f13SBarry Smith 2088aac34f13SBarry Smith #undef __FUNCT__ 2089aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2090aac34f13SBarry Smith /*@C 2091aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2092aac34f13SBarry Smith (the default parallel PETSc format). 2093aac34f13SBarry Smith 2094aac34f13SBarry Smith Collective on MPI_Comm 2095aac34f13SBarry Smith 2096aac34f13SBarry Smith Input Parameters: 2097aac34f13SBarry Smith + A - the matrix 2098aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2099aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2100aac34f13SBarry Smith - v - optional values in the matrix 2101aac34f13SBarry Smith 2102aac34f13SBarry Smith Level: developer 2103aac34f13SBarry Smith 2104aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2105aac34f13SBarry Smith 2106aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2107aac34f13SBarry Smith @*/ 2108be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2109aac34f13SBarry Smith { 2110aac34f13SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2111aac34f13SBarry Smith 2112aac34f13SBarry Smith PetscFunctionBegin; 2113aac34f13SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2114aac34f13SBarry Smith if (f) { 2115aac34f13SBarry Smith ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2116aac34f13SBarry Smith } 2117aac34f13SBarry Smith PetscFunctionReturn(0); 2118aac34f13SBarry Smith } 2119aac34f13SBarry Smith 2120d94109b8SHong Zhang EXTERN_C_BEGIN 21214a2ae208SSatish Balay #undef __FUNCT__ 2122a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2123be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2124a23d5eceSKris Buschelman { 2125a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2126dfbe8321SBarry Smith PetscErrorCode ierr; 2127b24ad042SBarry Smith PetscInt i; 2128a23d5eceSKris Buschelman 2129a23d5eceSKris Buschelman PetscFunctionBegin; 2130a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 2131a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2132a23d5eceSKris Buschelman 2133a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2134a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2135a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 213677431f27SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 213777431f27SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2138a23d5eceSKris Buschelman if (d_nnz) { 2139a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 214077431f27SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2141a23d5eceSKris Buschelman } 2142a23d5eceSKris Buschelman } 2143a23d5eceSKris Buschelman if (o_nnz) { 2144a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 214577431f27SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2146a23d5eceSKris Buschelman } 2147a23d5eceSKris Buschelman } 2148a23d5eceSKris Buschelman 2149a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2150a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2151a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2152a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2153a23d5eceSKris Buschelman 2154a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2155521d7252SBarry Smith B->bs = bs; 2156a23d5eceSKris Buschelman b->bs2 = bs*bs; 2157a23d5eceSKris Buschelman b->mbs = B->m/bs; 2158a23d5eceSKris Buschelman b->nbs = B->n/bs; 2159a23d5eceSKris Buschelman b->Mbs = B->M/bs; 2160a23d5eceSKris Buschelman b->Nbs = B->N/bs; 2161a23d5eceSKris Buschelman 2162b24ad042SBarry Smith ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2163a23d5eceSKris Buschelman b->rowners[0] = 0; 2164a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2165a23d5eceSKris Buschelman b->rowners[i] += b->rowners[i-1]; 2166a23d5eceSKris Buschelman } 2167a23d5eceSKris Buschelman b->rstart = b->rowners[b->rank]; 2168a23d5eceSKris Buschelman b->rend = b->rowners[b->rank+1]; 2169a23d5eceSKris Buschelman 2170b24ad042SBarry Smith ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2171a23d5eceSKris Buschelman b->cowners[0] = 0; 2172a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2173a23d5eceSKris Buschelman b->cowners[i] += b->cowners[i-1]; 2174a23d5eceSKris Buschelman } 2175a23d5eceSKris Buschelman b->cstart = b->cowners[b->rank]; 2176a23d5eceSKris Buschelman b->cend = b->cowners[b->rank+1]; 2177a23d5eceSKris Buschelman 2178a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2179a23d5eceSKris Buschelman b->rowners_bs[i] = b->rowners[i]*bs; 2180a23d5eceSKris Buschelman } 2181a23d5eceSKris Buschelman b->rstart_bs = b->rstart*bs; 2182a23d5eceSKris Buschelman b->rend_bs = b->rend*bs; 2183a23d5eceSKris Buschelman b->cstart_bs = b->cstart*bs; 2184a23d5eceSKris Buschelman b->cend_bs = b->cend*bs; 2185a23d5eceSKris Buschelman 2186f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2187f69a0ea3SMatthew Knepley ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 21889c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2189c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 219052e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2191f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2192f69a0ea3SMatthew Knepley ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 21939c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2194c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 219552e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2196c60e587dSKris Buschelman 2197a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2198a23d5eceSKris Buschelman 2199a23d5eceSKris Buschelman PetscFunctionReturn(0); 2200a23d5eceSKris Buschelman } 2201a23d5eceSKris Buschelman EXTERN_C_END 2202a23d5eceSKris Buschelman 2203a23d5eceSKris Buschelman EXTERN_C_BEGIN 2204be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2205be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 220692b32695SKris Buschelman EXTERN_C_END 22075bf65638SKris Buschelman 22080bad9183SKris Buschelman /*MC 2209fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 22100bad9183SKris Buschelman 22110bad9183SKris Buschelman Options Database Keys: 22120bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 22130bad9183SKris Buschelman 22140bad9183SKris Buschelman Level: beginner 22150bad9183SKris Buschelman 22160bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 22170bad9183SKris Buschelman M*/ 22180bad9183SKris Buschelman 221992b32695SKris Buschelman EXTERN_C_BEGIN 2220a23d5eceSKris Buschelman #undef __FUNCT__ 22214a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2222be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2223273d9f13SBarry Smith { 2224273d9f13SBarry Smith Mat_MPIBAIJ *b; 2225dfbe8321SBarry Smith PetscErrorCode ierr; 2226273d9f13SBarry Smith PetscTruth flg; 2227273d9f13SBarry Smith 2228273d9f13SBarry Smith PetscFunctionBegin; 222982502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 223082502324SSatish Balay B->data = (void*)b; 223182502324SSatish Balay 2232085a36d4SBarry Smith 2233273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2234273d9f13SBarry Smith B->mapping = 0; 2235273d9f13SBarry Smith B->factor = 0; 2236273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2237273d9f13SBarry Smith 2238273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2239273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2240273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2241273d9f13SBarry Smith 2242273d9f13SBarry Smith /* build local table of row and column ownerships */ 2243b24ad042SBarry Smith ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 224452e6d16bSBarry Smith ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2245273d9f13SBarry Smith b->cowners = b->rowners + b->size + 2; 2246273d9f13SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2247273d9f13SBarry Smith 2248273d9f13SBarry Smith /* build cache for off array entries formed */ 2249273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2250273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2251273d9f13SBarry Smith b->colmap = PETSC_NULL; 2252273d9f13SBarry Smith b->garray = PETSC_NULL; 2253273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2254273d9f13SBarry Smith 2255cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2256273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 225764a35ccbSBarry Smith b->setvalueslen = 0; 2258273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2259273d9f13SBarry Smith #endif 2260273d9f13SBarry Smith 2261273d9f13SBarry Smith /* stuff used in block assembly */ 2262273d9f13SBarry Smith b->barray = 0; 2263273d9f13SBarry Smith 2264273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2265273d9f13SBarry Smith b->lvec = 0; 2266273d9f13SBarry Smith b->Mvctx = 0; 2267273d9f13SBarry Smith 2268273d9f13SBarry Smith /* stuff for MatGetRow() */ 2269273d9f13SBarry Smith b->rowindices = 0; 2270273d9f13SBarry Smith b->rowvalues = 0; 2271273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2272273d9f13SBarry Smith 2273273d9f13SBarry Smith /* hash table stuff */ 2274273d9f13SBarry Smith b->ht = 0; 2275273d9f13SBarry Smith b->hd = 0; 2276273d9f13SBarry Smith b->ht_size = 0; 2277273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2278273d9f13SBarry Smith b->ht_fact = 0; 2279273d9f13SBarry Smith b->ht_total_ct = 0; 2280273d9f13SBarry Smith b->ht_insert_ct = 0; 2281273d9f13SBarry Smith 2282b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2283273d9f13SBarry Smith if (flg) { 2284f6275e2eSBarry Smith PetscReal fact = 1.39; 2285273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 228687828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2287273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2288273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 228963ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2290273d9f13SBarry Smith } 2291273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2292273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2293273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2294273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2295273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2296273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2297273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2298273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2299273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2300a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2301a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2302a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2303aac34f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2304aac34f13SBarry Smith "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2305aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 230692b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 230792b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 230892b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 23095bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 23105bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 23115bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2312273d9f13SBarry Smith PetscFunctionReturn(0); 2313273d9f13SBarry Smith } 2314273d9f13SBarry Smith EXTERN_C_END 2315273d9f13SBarry Smith 2316209238afSKris Buschelman /*MC 2317002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2318209238afSKris Buschelman 2319209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2320209238afSKris Buschelman and MATMPIBAIJ otherwise. 2321209238afSKris Buschelman 2322209238afSKris Buschelman Options Database Keys: 2323209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2324209238afSKris Buschelman 2325209238afSKris Buschelman Level: beginner 2326209238afSKris Buschelman 2327aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2328209238afSKris Buschelman M*/ 2329209238afSKris Buschelman 2330209238afSKris Buschelman EXTERN_C_BEGIN 2331209238afSKris Buschelman #undef __FUNCT__ 2332209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2333be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2334dfbe8321SBarry Smith { 23356849ba73SBarry Smith PetscErrorCode ierr; 2336b24ad042SBarry Smith PetscMPIInt size; 2337209238afSKris Buschelman 2338209238afSKris Buschelman PetscFunctionBegin; 2339209238afSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2340209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2341209238afSKris Buschelman if (size == 1) { 2342209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2343209238afSKris Buschelman } else { 2344209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2345209238afSKris Buschelman } 2346209238afSKris Buschelman PetscFunctionReturn(0); 2347209238afSKris Buschelman } 2348209238afSKris Buschelman EXTERN_C_END 2349209238afSKris Buschelman 23504a2ae208SSatish Balay #undef __FUNCT__ 23514a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2352273d9f13SBarry Smith /*@C 2353aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2354273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2355273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2356273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2357273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2358273d9f13SBarry Smith 2359273d9f13SBarry Smith Collective on Mat 2360273d9f13SBarry Smith 2361273d9f13SBarry Smith Input Parameters: 2362273d9f13SBarry Smith + A - the matrix 2363273d9f13SBarry Smith . bs - size of blockk 2364273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2365273d9f13SBarry Smith submatrix (same for all local rows) 2366273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2367273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2368273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2369273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2370273d9f13SBarry Smith submatrix (same for all local rows). 2371273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2372273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2373273d9f13SBarry Smith each block row) or PETSC_NULL. 2374273d9f13SBarry Smith 237549a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 2376273d9f13SBarry Smith 2377273d9f13SBarry Smith Options Database Keys: 2378273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2379273d9f13SBarry Smith block calculations (much slower) 2380273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2381273d9f13SBarry Smith 2382273d9f13SBarry Smith Notes: 2383273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2384273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2385273d9f13SBarry Smith 2386273d9f13SBarry Smith Storage Information: 2387273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2388273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2389273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2390273d9f13SBarry Smith local matrix (a rectangular submatrix). 2391273d9f13SBarry Smith 2392273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2393273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2394273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2395273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2396273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2397273d9f13SBarry Smith 2398273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2399273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2400273d9f13SBarry Smith 2401273d9f13SBarry Smith .vb 2402273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2403273d9f13SBarry Smith ------------------- 2404273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2405273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2406273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2407273d9f13SBarry Smith ------------------- 2408273d9f13SBarry Smith .ve 2409273d9f13SBarry Smith 2410273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2411273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2412273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2413273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2414273d9f13SBarry Smith 2415273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2416273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2417273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2418273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2419273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2420273d9f13SBarry Smith matrices. 2421273d9f13SBarry Smith 2422273d9f13SBarry Smith Level: intermediate 2423273d9f13SBarry Smith 2424273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2425273d9f13SBarry Smith 2426aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2427273d9f13SBarry Smith @*/ 2428be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2429273d9f13SBarry Smith { 2430b24ad042SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2431273d9f13SBarry Smith 2432273d9f13SBarry Smith PetscFunctionBegin; 2433a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2434a23d5eceSKris Buschelman if (f) { 2435a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2436273d9f13SBarry Smith } 2437273d9f13SBarry Smith PetscFunctionReturn(0); 2438273d9f13SBarry Smith } 2439273d9f13SBarry Smith 24404a2ae208SSatish Balay #undef __FUNCT__ 24414a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 244279bdfe76SSatish Balay /*@C 244379bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 244479bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 244579bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 244679bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 244779bdfe76SSatish Balay performance can be increased by more than a factor of 50. 244879bdfe76SSatish Balay 2449db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2450db81eaa0SLois Curfman McInnes 245179bdfe76SSatish Balay Input Parameters: 2452db81eaa0SLois Curfman McInnes + comm - MPI communicator 245379bdfe76SSatish Balay . bs - size of blockk 245479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 245592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 245692e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 245792e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 245892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 245992e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2460be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2461be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 246247a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 246379bdfe76SSatish Balay submatrix (same for all local rows) 246447a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 246592e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2466db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 246747a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 246879bdfe76SSatish Balay submatrix (same for all local rows). 246947a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 247092e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 247192e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 247279bdfe76SSatish Balay 247379bdfe76SSatish Balay Output Parameter: 247479bdfe76SSatish Balay . A - the matrix 247579bdfe76SSatish Balay 2476db81eaa0SLois Curfman McInnes Options Database Keys: 2477db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2478db81eaa0SLois Curfman McInnes block calculations (much slower) 2479db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 24803ffaccefSLois Curfman McInnes 2481b259b22eSLois Curfman McInnes Notes: 248249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 248349a6f317SBarry Smith 248447a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 248547a75d0bSBarry Smith 248679bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 248779bdfe76SSatish Balay (possibly both). 248879bdfe76SSatish Balay 2489be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2490be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2491be79a94dSBarry Smith 249279bdfe76SSatish Balay Storage Information: 249379bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 249479bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 249579bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 249679bdfe76SSatish Balay local matrix (a rectangular submatrix). 249779bdfe76SSatish Balay 249879bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 249979bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 250079bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 250179bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 250279bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 250379bdfe76SSatish Balay 250479bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 250579bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 250679bdfe76SSatish Balay 2507db81eaa0SLois Curfman McInnes .vb 2508db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2509db81eaa0SLois Curfman McInnes ------------------- 2510db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2511db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2512db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2513db81eaa0SLois Curfman McInnes ------------------- 2514db81eaa0SLois Curfman McInnes .ve 251579bdfe76SSatish Balay 251679bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 251779bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 251879bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 251957b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 252079bdfe76SSatish Balay 2521d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2522d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 252379bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 252492e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 252592e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 25266da5968aSLois Curfman McInnes matrices. 252779bdfe76SSatish Balay 2528027ccd11SLois Curfman McInnes Level: intermediate 2529027ccd11SLois Curfman McInnes 253092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 253179bdfe76SSatish Balay 2532aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 253379bdfe76SSatish Balay @*/ 2534be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 253579bdfe76SSatish Balay { 25366849ba73SBarry Smith PetscErrorCode ierr; 2537b24ad042SBarry Smith PetscMPIInt size; 253879bdfe76SSatish Balay 2539d64ed03dSBarry Smith PetscFunctionBegin; 2540f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2541f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2542d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2543273d9f13SBarry Smith if (size > 1) { 2544273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2545273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2546273d9f13SBarry Smith } else { 2547273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2548273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 25493914022bSBarry Smith } 25503a40ed3dSBarry Smith PetscFunctionReturn(0); 255179bdfe76SSatish Balay } 2552026e39d0SSatish Balay 25534a2ae208SSatish Balay #undef __FUNCT__ 25544a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 25556849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 25560ac07820SSatish Balay { 25570ac07820SSatish Balay Mat mat; 25580ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2559dfbe8321SBarry Smith PetscErrorCode ierr; 2560b24ad042SBarry Smith PetscInt len=0; 25610ac07820SSatish Balay 2562d64ed03dSBarry Smith PetscFunctionBegin; 25630ac07820SSatish Balay *newmat = 0; 2564f69a0ea3SMatthew Knepley ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2565f69a0ea3SMatthew Knepley ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 2566be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 25671d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 25687fff6886SHong Zhang 25694beb1cfeSHong Zhang mat->factor = matin->factor; 2570273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 25710ac07820SSatish Balay mat->assembled = PETSC_TRUE; 25727fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 25737fff6886SHong Zhang 2574273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 2575521d7252SBarry Smith mat->bs = matin->bs; 25760ac07820SSatish Balay a->bs2 = oldmat->bs2; 25770ac07820SSatish Balay a->mbs = oldmat->mbs; 25780ac07820SSatish Balay a->nbs = oldmat->nbs; 25790ac07820SSatish Balay a->Mbs = oldmat->Mbs; 25800ac07820SSatish Balay a->Nbs = oldmat->Nbs; 25810ac07820SSatish Balay 25820ac07820SSatish Balay a->rstart = oldmat->rstart; 25830ac07820SSatish Balay a->rend = oldmat->rend; 25840ac07820SSatish Balay a->cstart = oldmat->cstart; 25850ac07820SSatish Balay a->cend = oldmat->cend; 25860ac07820SSatish Balay a->size = oldmat->size; 25870ac07820SSatish Balay a->rank = oldmat->rank; 2588aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2589aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2590aef5e8e0SSatish Balay a->rowindices = 0; 25910ac07820SSatish Balay a->rowvalues = 0; 25920ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 259330793edcSSatish Balay a->barray = 0; 25943123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 25953123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 25963123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 25973123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 25980ac07820SSatish Balay 2599133cdb44SSatish Balay /* hash table stuff */ 2600133cdb44SSatish Balay a->ht = 0; 2601133cdb44SSatish Balay a->hd = 0; 2602133cdb44SSatish Balay a->ht_size = 0; 2603133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 260425fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2605133cdb44SSatish Balay a->ht_total_ct = 0; 2606133cdb44SSatish Balay a->ht_insert_ct = 0; 2607133cdb44SSatish Balay 2608b24ad042SBarry Smith ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 26098798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2610521d7252SBarry Smith ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 26110ac07820SSatish Balay if (oldmat->colmap) { 2612aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 26130f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 261448e59246SSatish Balay #else 2615b24ad042SBarry Smith ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 261652e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2617b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 261848e59246SSatish Balay #endif 26190ac07820SSatish Balay } else a->colmap = 0; 26204beb1cfeSHong Zhang 26210ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2622b24ad042SBarry Smith ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 262352e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2624b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 26250ac07820SSatish Balay } else a->garray = 0; 26260ac07820SSatish Balay 26270ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 262852e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 26290ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 263052e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 26317fff6886SHong Zhang 26322e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 263352e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 26342e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 263552e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2636b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 26370ac07820SSatish Balay *newmat = mat; 26384beb1cfeSHong Zhang 26393a40ed3dSBarry Smith PetscFunctionReturn(0); 26400ac07820SSatish Balay } 264157b952d6SSatish Balay 2642e090d566SSatish Balay #include "petscsys.h" 264357b952d6SSatish Balay 26444a2ae208SSatish Balay #undef __FUNCT__ 26454a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2646f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 264757b952d6SSatish Balay { 264857b952d6SSatish Balay Mat A; 26496849ba73SBarry Smith PetscErrorCode ierr; 2650b24ad042SBarry Smith int fd; 2651b24ad042SBarry Smith PetscInt i,nz,j,rstart,rend; 265287828ca2SBarry Smith PetscScalar *vals,*buf; 265357b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 265457b952d6SSatish Balay MPI_Status status; 2655b24ad042SBarry Smith PetscMPIInt rank,size,maxnz; 2656b24ad042SBarry Smith PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2657dc231df0SBarry Smith PetscInt *locrowlens,*procsnz = 0,*browners; 2658167e7480SBarry Smith PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2659dc231df0SBarry Smith PetscMPIInt tag = ((PetscObject)viewer)->tag; 2660b24ad042SBarry Smith PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2661dc231df0SBarry Smith PetscInt dcount,kmax,k,nzcount,tmp,mend; 266257b952d6SSatish Balay 2663d64ed03dSBarry Smith PetscFunctionBegin; 2664b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 266557b952d6SSatish Balay 2666d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2667d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 266857b952d6SSatish Balay if (!rank) { 2669b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2670e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2671552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 26726c5fab8fSBarry Smith } 2673d64ed03dSBarry Smith 2674b24ad042SBarry Smith ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 267557b952d6SSatish Balay M = header[1]; N = header[2]; 267657b952d6SSatish Balay 267729bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 267857b952d6SSatish Balay 267957b952d6SSatish Balay /* 268057b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 268157b952d6SSatish Balay divisible by the blocksize 268257b952d6SSatish Balay */ 268357b952d6SSatish Balay Mbs = M/bs; 2684dc231df0SBarry Smith extra_rows = bs - M + bs*Mbs; 268557b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 268657b952d6SSatish Balay else Mbs++; 268757b952d6SSatish Balay if (extra_rows && !rank) { 268863ba0a88SBarry Smith ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 268957b952d6SSatish Balay } 2690537820f0SBarry Smith 269157b952d6SSatish Balay /* determine ownership of all rows */ 269257b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 269357b952d6SSatish Balay m = mbs*bs; 2694dc231df0SBarry Smith ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2695b24ad042SBarry Smith ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2696167e7480SBarry Smith 2697167e7480SBarry Smith /* process 0 needs enough room for process with most rows */ 2698167e7480SBarry Smith if (!rank) { 2699167e7480SBarry Smith mmax = rowners[1]; 2700167e7480SBarry Smith for (i=2; i<size; i++) { 2701167e7480SBarry Smith mmax = PetscMax(mmax,rowners[i]); 2702167e7480SBarry Smith } 2703ca02efcfSSatish Balay mmax*=bs; 2704167e7480SBarry Smith } else mmax = m; 2705167e7480SBarry Smith 270657b952d6SSatish Balay rowners[0] = 0; 2707cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2708cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 270957b952d6SSatish Balay rstart = rowners[rank]; 271057b952d6SSatish Balay rend = rowners[rank+1]; 271157b952d6SSatish Balay 271257b952d6SSatish Balay /* distribute row lengths to all processors */ 271319c38ff2SBarry Smith ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 271457b952d6SSatish Balay if (!rank) { 2715dc231df0SBarry Smith mend = m; 2716dc231df0SBarry Smith if (size == 1) mend = mend - extra_rows; 2717dc231df0SBarry Smith ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2718dc231df0SBarry Smith for (j=mend; j<m; j++) locrowlens[j] = 1; 2719dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2720b24ad042SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2721b24ad042SBarry Smith ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2722dc231df0SBarry Smith for (j=0; j<m; j++) { 2723dc231df0SBarry Smith procsnz[0] += locrowlens[j]; 2724dc231df0SBarry Smith } 2725dc231df0SBarry Smith for (i=1; i<size; i++) { 2726dc231df0SBarry Smith mend = browners[i+1] - browners[i]; 2727dc231df0SBarry Smith if (i == size-1) mend = mend - extra_rows; 2728dc231df0SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2729dc231df0SBarry Smith for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2730dc231df0SBarry Smith /* calculate the number of nonzeros on each processor */ 2731dc231df0SBarry Smith for (j=0; j<browners[i+1]-browners[i]; j++) { 273257b952d6SSatish Balay procsnz[i] += rowlengths[j]; 273357b952d6SSatish Balay } 2734dc231df0SBarry Smith ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 273557b952d6SSatish Balay } 2736606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2737dc231df0SBarry Smith } else { 2738dc231df0SBarry Smith ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2739dc231df0SBarry Smith } 274057b952d6SSatish Balay 2741dc231df0SBarry Smith if (!rank) { 274257b952d6SSatish Balay /* determine max buffer needed and allocate it */ 27438a8e0b3aSBarry Smith maxnz = procsnz[0]; 2744cdc0ba36SBarry Smith for (i=1; i<size; i++) { 274557b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 274657b952d6SSatish Balay } 2747b24ad042SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 274857b952d6SSatish Balay 274957b952d6SSatish Balay /* read in my part of the matrix column indices */ 275057b952d6SSatish Balay nz = procsnz[0]; 275119c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 275257b952d6SSatish Balay mycols = ibuf; 2753cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2754e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2755cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2756cee3aa6bSSatish Balay 275757b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 275857b952d6SSatish Balay for (i=1; i<size-1; i++) { 275957b952d6SSatish Balay nz = procsnz[i]; 2760e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2761b24ad042SBarry Smith ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276257b952d6SSatish Balay } 276357b952d6SSatish Balay /* read in the stuff for the last proc */ 276457b952d6SSatish Balay if (size != 1) { 276557b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2766e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 276757b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2768b24ad042SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 276957b952d6SSatish Balay } 2770606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2771d64ed03dSBarry Smith } else { 277257b952d6SSatish Balay /* determine buffer space needed for message */ 277357b952d6SSatish Balay nz = 0; 277457b952d6SSatish Balay for (i=0; i<m; i++) { 277557b952d6SSatish Balay nz += locrowlens[i]; 277657b952d6SSatish Balay } 277719c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 277857b952d6SSatish Balay mycols = ibuf; 277957b952d6SSatish Balay /* receive message of column indices*/ 2780b24ad042SBarry Smith ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2781b24ad042SBarry Smith ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 278229bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 278357b952d6SSatish Balay } 278457b952d6SSatish Balay 278557b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2786dc231df0SBarry Smith ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2787dc231df0SBarry Smith ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2788dc231df0SBarry Smith ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2789dc231df0SBarry Smith ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2790dc231df0SBarry Smith ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 279157b952d6SSatish Balay rowcount = 0; nzcount = 0; 279257b952d6SSatish Balay for (i=0; i<mbs; i++) { 279357b952d6SSatish Balay dcount = 0; 279457b952d6SSatish Balay odcount = 0; 279557b952d6SSatish Balay for (j=0; j<bs; j++) { 279657b952d6SSatish Balay kmax = locrowlens[rowcount]; 279757b952d6SSatish Balay for (k=0; k<kmax; k++) { 279857b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 279957b952d6SSatish Balay if (!mask[tmp]) { 280057b952d6SSatish Balay mask[tmp] = 1; 280157b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 280257b952d6SSatish Balay else masked1[dcount++] = tmp; 280357b952d6SSatish Balay } 280457b952d6SSatish Balay } 280557b952d6SSatish Balay rowcount++; 280657b952d6SSatish Balay } 2807cee3aa6bSSatish Balay 280857b952d6SSatish Balay dlens[i] = dcount; 280957b952d6SSatish Balay odlens[i] = odcount; 2810cee3aa6bSSatish Balay 281157b952d6SSatish Balay /* zero out the mask elements we set */ 281257b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 281357b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 281457b952d6SSatish Balay } 2815cee3aa6bSSatish Balay 281657b952d6SSatish Balay /* create our matrix */ 2817f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2818f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 281978ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 282078ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 282178ae41b4SKris Buschelman 282278ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2823dc231df0SBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 282457b952d6SSatish Balay 282557b952d6SSatish Balay if (!rank) { 282619c38ff2SBarry Smith ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 282757b952d6SSatish Balay /* read in my part of the matrix numerical values */ 282857b952d6SSatish Balay nz = procsnz[0]; 282957b952d6SSatish Balay vals = buf; 2830cee3aa6bSSatish Balay mycols = ibuf; 2831cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2832e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2833cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2834537820f0SBarry Smith 283557b952d6SSatish Balay /* insert into matrix */ 283657b952d6SSatish Balay jj = rstart*bs; 283757b952d6SSatish Balay for (i=0; i<m; i++) { 2838dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 283957b952d6SSatish Balay mycols += locrowlens[i]; 284057b952d6SSatish Balay vals += locrowlens[i]; 284157b952d6SSatish Balay jj++; 284257b952d6SSatish Balay } 284357b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 284457b952d6SSatish Balay for (i=1; i<size-1; i++) { 284557b952d6SSatish Balay nz = procsnz[i]; 284657b952d6SSatish Balay vals = buf; 2847e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2848ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 284957b952d6SSatish Balay } 285057b952d6SSatish Balay /* the last proc */ 285157b952d6SSatish Balay if (size != 1){ 285257b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2853cee3aa6bSSatish Balay vals = buf; 2854e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 285557b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2856ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 285757b952d6SSatish Balay } 2858606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2859d64ed03dSBarry Smith } else { 286057b952d6SSatish Balay /* receive numeric values */ 286119c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 286257b952d6SSatish Balay 286357b952d6SSatish Balay /* receive message of values*/ 286457b952d6SSatish Balay vals = buf; 2865cee3aa6bSSatish Balay mycols = ibuf; 2866ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2867ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 286829bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 286957b952d6SSatish Balay 287057b952d6SSatish Balay /* insert into matrix */ 287157b952d6SSatish Balay jj = rstart*bs; 2872cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2873dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 287457b952d6SSatish Balay mycols += locrowlens[i]; 287557b952d6SSatish Balay vals += locrowlens[i]; 287657b952d6SSatish Balay jj++; 287757b952d6SSatish Balay } 287857b952d6SSatish Balay } 2879606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2880606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2881606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2882dc231df0SBarry Smith ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2883dc231df0SBarry Smith ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2884dc231df0SBarry Smith ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 28856d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28866d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288778ae41b4SKris Buschelman 288878ae41b4SKris Buschelman *newmat = A; 28893a40ed3dSBarry Smith PetscFunctionReturn(0); 289057b952d6SSatish Balay } 289157b952d6SSatish Balay 28924a2ae208SSatish Balay #undef __FUNCT__ 28934a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2894133cdb44SSatish Balay /*@ 2895133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2896133cdb44SSatish Balay 2897133cdb44SSatish Balay Input Parameters: 2898133cdb44SSatish Balay . mat - the matrix 2899133cdb44SSatish Balay . fact - factor 2900133cdb44SSatish Balay 2901fee21e36SBarry Smith Collective on Mat 2902fee21e36SBarry Smith 29038c890885SBarry Smith Level: advanced 29048c890885SBarry Smith 2905133cdb44SSatish Balay Notes: 2906133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2907133cdb44SSatish Balay 2908133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2909133cdb44SSatish Balay 2910133cdb44SSatish Balay .seealso: MatSetOption() 2911133cdb44SSatish Balay @*/ 2912be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2913133cdb44SSatish Balay { 2914dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscReal); 29155bf65638SKris Buschelman 29165bf65638SKris Buschelman PetscFunctionBegin; 29175bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 29185bf65638SKris Buschelman if (f) { 29195bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 29205bf65638SKris Buschelman } 29215bf65638SKris Buschelman PetscFunctionReturn(0); 29225bf65638SKris Buschelman } 29235bf65638SKris Buschelman 2924be1d678aSKris Buschelman EXTERN_C_BEGIN 29255bf65638SKris Buschelman #undef __FUNCT__ 29265bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2927be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 29285bf65638SKris Buschelman { 292925fdafccSSatish Balay Mat_MPIBAIJ *baij; 2930133cdb44SSatish Balay 2931133cdb44SSatish Balay PetscFunctionBegin; 2932133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2933133cdb44SSatish Balay baij->ht_fact = fact; 2934133cdb44SSatish Balay PetscFunctionReturn(0); 2935133cdb44SSatish Balay } 2936be1d678aSKris Buschelman EXTERN_C_END 2937f2a5309cSSatish Balay 29384a2ae208SSatish Balay #undef __FUNCT__ 29394a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2940be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2941f2a5309cSSatish Balay { 2942f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2943f2a5309cSSatish Balay PetscFunctionBegin; 2944f2a5309cSSatish Balay *Ad = a->A; 2945f2a5309cSSatish Balay *Ao = a->B; 2946195d93cdSBarry Smith *colmap = a->garray; 2947f2a5309cSSatish Balay PetscFunctionReturn(0); 2948f2a5309cSSatish Balay } 2949