1be1d678aSKris Buschelman #define PETSCMAT_DLL 279bdfe76SSatish Balay 3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 479bdfe76SSatish Balay 5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 1593fea6afSBarry Smith 1693fea6afSBarry Smith /* UGLY, ugly, ugly 1787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1893fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 1993fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2093fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2193fea6afSBarry Smith into the single precision data structures. 2293fea6afSBarry Smith */ 2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 24b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 2993fea6afSBarry Smith #else 306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3593fea6afSBarry Smith #endif 363b2fbd54SBarry Smith 374a2ae208SSatish Balay #undef __FUNCT__ 38985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 39985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 407843d17aSBarry Smith { 417843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42dfbe8321SBarry Smith PetscErrorCode ierr; 43985db425SBarry Smith PetscInt i,*idxb = 0; 4487828ca2SBarry Smith PetscScalar *va,*vb; 457843d17aSBarry Smith Vec vtmp; 467843d17aSBarry Smith 477843d17aSBarry Smith PetscFunctionBegin; 487843d17aSBarry Smith 49985db425SBarry Smith ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 501ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51985db425SBarry Smith if (idx) { 52ff6e566aSBarry Smith for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 53985db425SBarry Smith } 547843d17aSBarry Smith 55899cda47SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 56985db425SBarry Smith if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 57985db425SBarry Smith ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 581ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 597843d17aSBarry Smith 60899cda47SBarry Smith for (i=0; i<A->rmap.n; i++){ 61985db425SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 627843d17aSBarry Smith } 637843d17aSBarry Smith 641ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 651ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 66985db425SBarry Smith if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 67ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 687843d17aSBarry Smith PetscFunctionReturn(0); 697843d17aSBarry Smith } 707843d17aSBarry Smith 717fc3c18eSBarry Smith EXTERN_C_BEGIN 724a2ae208SSatish Balay #undef __FUNCT__ 734a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 74be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 757fc3c18eSBarry Smith { 767fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 77dfbe8321SBarry Smith PetscErrorCode ierr; 787fc3c18eSBarry Smith 797fc3c18eSBarry Smith PetscFunctionBegin; 807fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 817fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 827fc3c18eSBarry Smith PetscFunctionReturn(0); 837fc3c18eSBarry Smith } 847fc3c18eSBarry Smith EXTERN_C_END 857fc3c18eSBarry Smith 867fc3c18eSBarry Smith EXTERN_C_BEGIN 874a2ae208SSatish Balay #undef __FUNCT__ 884a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 89be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 907fc3c18eSBarry Smith { 917fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 92dfbe8321SBarry Smith PetscErrorCode ierr; 937fc3c18eSBarry Smith 947fc3c18eSBarry Smith PetscFunctionBegin; 957fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 967fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 977fc3c18eSBarry Smith PetscFunctionReturn(0); 987fc3c18eSBarry Smith } 997fc3c18eSBarry Smith EXTERN_C_END 1007fc3c18eSBarry Smith 101537820f0SBarry Smith /* 102537820f0SBarry Smith Local utility routine that creates a mapping from the global column 10357b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10457b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10557b952d6SSatish Balay length of colmap equals the global matrix length. 10657b952d6SSatish Balay */ 1074a2ae208SSatish Balay #undef __FUNCT__ 1084a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 109dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 11057b952d6SSatish Balay { 11157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 1136849ba73SBarry Smith PetscErrorCode ierr; 114899cda47SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 11557b952d6SSatish Balay 116d64ed03dSBarry Smith PetscFunctionBegin; 117aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 118f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11948e59246SSatish Balay for (i=0; i<nbs; i++){ 1200f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 12148e59246SSatish Balay } 12248e59246SSatish Balay #else 123b24ad042SBarry Smith ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 12452e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 125b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 126928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12748e59246SSatish Balay #endif 1283a40ed3dSBarry Smith PetscFunctionReturn(0); 12957b952d6SSatish Balay } 13057b952d6SSatish Balay 13180c1aa95SSatish Balay #define CHUNKSIZE 10 13280c1aa95SSatish Balay 133f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13480c1aa95SSatish Balay { \ 13580c1aa95SSatish Balay \ 13680c1aa95SSatish Balay brow = row/bs; \ 13780c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 138ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13980c1aa95SSatish Balay bcol = col/bs; \ 14080c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 141ab26458aSBarry Smith low = 0; high = nrow; \ 142ab26458aSBarry Smith while (high-low > 3) { \ 143ab26458aSBarry Smith t = (low+high)/2; \ 144ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 145ab26458aSBarry Smith else low = t; \ 146ab26458aSBarry Smith } \ 147ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14880c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14980c1aa95SSatish Balay if (rp[_i] == bcol) { \ 15080c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 151eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 152eada6651SSatish Balay else *bap = value; \ 153ac7a638eSSatish Balay goto a_noinsert; \ 15480c1aa95SSatish Balay } \ 15580c1aa95SSatish Balay } \ 15689280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 157085a36d4SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 158421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 15980c1aa95SSatish Balay N = nrow++ - 1; \ 16080c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 16180c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 16280c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1633eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 16480c1aa95SSatish Balay } \ 1653eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 16680c1aa95SSatish Balay rp[_i] = bcol; \ 16780c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 168ac7a638eSSatish Balay a_noinsert:; \ 16980c1aa95SSatish Balay ailen[brow] = nrow; \ 17080c1aa95SSatish Balay } 17157b952d6SSatish Balay 172ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 173ac7a638eSSatish Balay { \ 174ac7a638eSSatish Balay brow = row/bs; \ 175ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 177ac7a638eSSatish Balay bcol = col/bs; \ 178ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 179ac7a638eSSatish Balay low = 0; high = nrow; \ 180ac7a638eSSatish Balay while (high-low > 3) { \ 181ac7a638eSSatish Balay t = (low+high)/2; \ 182ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 183ac7a638eSSatish Balay else low = t; \ 184ac7a638eSSatish Balay } \ 185ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 186ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 187ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 188ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 189ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 190ac7a638eSSatish Balay else *bap = value; \ 191ac7a638eSSatish Balay goto b_noinsert; \ 192ac7a638eSSatish Balay } \ 193ac7a638eSSatish Balay } \ 19489280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 195085a36d4SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 196421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 197085a36d4SBarry Smith CHKMEMQ;\ 198ac7a638eSSatish Balay N = nrow++ - 1; \ 199ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 200ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 201ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2023eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 203ac7a638eSSatish Balay } \ 2043eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 205ac7a638eSSatish Balay rp[_i] = bcol; \ 206ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 207ac7a638eSSatish Balay b_noinsert:; \ 208ac7a638eSSatish Balay bilen[brow] = nrow; \ 209ac7a638eSSatish Balay } 210ac7a638eSSatish Balay 21193fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2124a2ae208SSatish Balay #undef __FUNCT__ 2134a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 214b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 21557b952d6SSatish Balay { 2166fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 217dfbe8321SBarry Smith PetscErrorCode ierr; 218b24ad042SBarry Smith PetscInt i,N = m*n; 2196fa18ffdSBarry Smith MatScalar *vsingle; 22093fea6afSBarry Smith 22193fea6afSBarry Smith PetscFunctionBegin; 2226fa18ffdSBarry Smith if (N > b->setvalueslen) { 22305b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 22482502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2256fa18ffdSBarry Smith b->setvalueslen = N; 2266fa18ffdSBarry Smith } 2276fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2286fa18ffdSBarry Smith 22993fea6afSBarry Smith for (i=0; i<N; i++) { 23093fea6afSBarry Smith vsingle[i] = v[i]; 23193fea6afSBarry Smith } 23293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 23393fea6afSBarry Smith PetscFunctionReturn(0); 23493fea6afSBarry Smith } 23593fea6afSBarry Smith 2364a2ae208SSatish Balay #undef __FUNCT__ 2374a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 238b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 23993fea6afSBarry Smith { 2406fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 241dfbe8321SBarry Smith PetscErrorCode ierr; 242b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2436fa18ffdSBarry Smith MatScalar *vsingle; 24493fea6afSBarry Smith 24593fea6afSBarry Smith PetscFunctionBegin; 2466fa18ffdSBarry Smith if (N > b->setvalueslen) { 24705b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 24882502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2496fa18ffdSBarry Smith b->setvalueslen = N; 2506fa18ffdSBarry Smith } 2516fa18ffdSBarry Smith vsingle = b->setvaluescopy; 25293fea6afSBarry Smith for (i=0; i<N; i++) { 25393fea6afSBarry Smith vsingle[i] = v[i]; 25493fea6afSBarry Smith } 25593fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 25693fea6afSBarry Smith PetscFunctionReturn(0); 25793fea6afSBarry Smith } 2586fa18ffdSBarry Smith 2594a2ae208SSatish Balay #undef __FUNCT__ 2604a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 261b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2626fa18ffdSBarry Smith { 2636fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 264dfbe8321SBarry Smith PetscErrorCode ierr; 265b24ad042SBarry Smith PetscInt i,N = m*n; 2666fa18ffdSBarry Smith MatScalar *vsingle; 2676fa18ffdSBarry Smith 2686fa18ffdSBarry Smith PetscFunctionBegin; 2696fa18ffdSBarry Smith if (N > b->setvalueslen) { 27005b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 27182502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2726fa18ffdSBarry Smith b->setvalueslen = N; 2736fa18ffdSBarry Smith } 2746fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2756fa18ffdSBarry Smith for (i=0; i<N; i++) { 2766fa18ffdSBarry Smith vsingle[i] = v[i]; 2776fa18ffdSBarry Smith } 2786fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 2796fa18ffdSBarry Smith PetscFunctionReturn(0); 2806fa18ffdSBarry Smith } 2816fa18ffdSBarry Smith 2824a2ae208SSatish Balay #undef __FUNCT__ 2834a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 284b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 2856fa18ffdSBarry Smith { 2866fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287dfbe8321SBarry Smith PetscErrorCode ierr; 288b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 2896fa18ffdSBarry Smith MatScalar *vsingle; 2906fa18ffdSBarry Smith 2916fa18ffdSBarry Smith PetscFunctionBegin; 2926fa18ffdSBarry Smith if (N > b->setvalueslen) { 29305b42c5fSBarry Smith ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 29482502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2956fa18ffdSBarry Smith b->setvalueslen = N; 2966fa18ffdSBarry Smith } 2976fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2986fa18ffdSBarry Smith for (i=0; i<N; i++) { 2996fa18ffdSBarry Smith vsingle[i] = v[i]; 3006fa18ffdSBarry Smith } 3016fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3026fa18ffdSBarry Smith PetscFunctionReturn(0); 3036fa18ffdSBarry Smith } 30493fea6afSBarry Smith #endif 30593fea6afSBarry Smith 3064a2ae208SSatish Balay #undef __FUNCT__ 307e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 308b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 30993fea6afSBarry Smith { 31057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 31193fea6afSBarry Smith MatScalar value; 312273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 313dfbe8321SBarry Smith PetscErrorCode ierr; 314b24ad042SBarry Smith PetscInt i,j,row,col; 315899cda47SBarry Smith PetscInt rstart_orig=mat->rmap.rstart; 316899cda47SBarry Smith PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 317899cda47SBarry Smith PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 31857b952d6SSatish Balay 319eada6651SSatish Balay /* Some Variables required in the macro */ 32080c1aa95SSatish Balay Mat A = baij->A; 32180c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 322b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3233eda8832SBarry Smith MatScalar *aa=a->a; 324ac7a638eSSatish Balay 325ac7a638eSSatish Balay Mat B = baij->B; 326ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3283eda8832SBarry Smith MatScalar *ba=b->a; 329ac7a638eSSatish Balay 330b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 331b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 3323eda8832SBarry Smith MatScalar *ap,*bap; 33380c1aa95SSatish Balay 334d64ed03dSBarry Smith PetscFunctionBegin; 33557b952d6SSatish Balay for (i=0; i<m; i++) { 3365ef9f2a5SBarry Smith if (im[i] < 0) continue; 3372515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 338899cda47SBarry Smith if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 339639f9d9dSBarry Smith #endif 34057b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 34157b952d6SSatish Balay row = im[i] - rstart_orig; 34257b952d6SSatish Balay for (j=0; j<n; j++) { 34357b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 34457b952d6SSatish Balay col = in[j] - cstart_orig; 34557b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 346f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 34780c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 34873959e64SBarry Smith } else if (in[j] < 0) continue; 3492515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 350899cda47SBarry Smith else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 351639f9d9dSBarry Smith #endif 35257b952d6SSatish Balay else { 35357b952d6SSatish Balay if (mat->was_assembled) { 354905e6a2fSBarry Smith if (!baij->colmap) { 355905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 356905e6a2fSBarry Smith } 357aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 3580f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 359bba1ac68SSatish Balay col = col - 1; 36048e59246SSatish Balay #else 361bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 36248e59246SSatish Balay #endif 36357b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 36457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3658295de27SSatish Balay col = in[j]; 3669bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 3679bf004c3SSatish Balay B = baij->B; 3689bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 3699bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 3709bf004c3SSatish Balay ba=b->a; 371bba1ac68SSatish Balay } else col += in[j]%bs; 3728295de27SSatish Balay } else col = in[j]; 37357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 37490da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 37590da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 37657b952d6SSatish Balay } 37757b952d6SSatish Balay } 378d64ed03dSBarry Smith } else { 37990f02eecSBarry Smith if (!baij->donotstash) { 380ff2fd236SBarry Smith if (roworiented) { 3816fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 382ff2fd236SBarry Smith } else { 3836fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 38457b952d6SSatish Balay } 38557b952d6SSatish Balay } 38657b952d6SSatish Balay } 38790f02eecSBarry Smith } 3883a40ed3dSBarry Smith PetscFunctionReturn(0); 38957b952d6SSatish Balay } 39057b952d6SSatish Balay 3914a2ae208SSatish Balay #undef __FUNCT__ 3924a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 393b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 394ab26458aSBarry Smith { 395ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 396f15d580aSBarry Smith const MatScalar *value; 397f15d580aSBarry Smith MatScalar *barray=baij->barray; 398273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 399dfbe8321SBarry Smith PetscErrorCode ierr; 400899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 401899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 402899cda47SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 403ab26458aSBarry Smith 404b16ae2b1SBarry Smith PetscFunctionBegin; 40530793edcSSatish Balay if(!barray) { 40682502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 40782502324SSatish Balay baij->barray = barray; 40830793edcSSatish Balay } 40930793edcSSatish Balay 410ab26458aSBarry Smith if (roworiented) { 411ab26458aSBarry Smith stepval = (n-1)*bs; 412ab26458aSBarry Smith } else { 413ab26458aSBarry Smith stepval = (m-1)*bs; 414ab26458aSBarry Smith } 415ab26458aSBarry Smith for (i=0; i<m; i++) { 4165ef9f2a5SBarry Smith if (im[i] < 0) continue; 4172515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 41877431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 419ab26458aSBarry Smith #endif 420ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 421ab26458aSBarry Smith row = im[i] - rstart; 422ab26458aSBarry Smith for (j=0; j<n; j++) { 42315b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 42415b57d14SSatish Balay if ((roworiented) && (n == 1)) { 425f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 42615b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 427f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 42815b57d14SSatish Balay } else { /* Here a copy is required */ 429ab26458aSBarry Smith if (roworiented) { 430ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 431ab26458aSBarry Smith } else { 432ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 433abef11f7SSatish Balay } 43447513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 43547513183SBarry Smith for (jj=0; jj<bs; jj++) { 43630793edcSSatish Balay *barray++ = *value++; 43747513183SBarry Smith } 43847513183SBarry Smith } 43930793edcSSatish Balay barray -=bs2; 44015b57d14SSatish Balay } 441abef11f7SSatish Balay 442abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 443abef11f7SSatish Balay col = in[j] - cstart; 44493fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 445ab26458aSBarry Smith } 4465ef9f2a5SBarry Smith else if (in[j] < 0) continue; 4472515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 44877431f27SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 449ab26458aSBarry Smith #endif 450ab26458aSBarry Smith else { 451ab26458aSBarry Smith if (mat->was_assembled) { 452ab26458aSBarry Smith if (!baij->colmap) { 453ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 454ab26458aSBarry Smith } 455a5eb4965SSatish Balay 4562515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 457aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 458b24ad042SBarry Smith { PetscInt data; 4590f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 46029bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461fa46199cSSatish Balay } 46248e59246SSatish Balay #else 46329bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 464a5eb4965SSatish Balay #endif 46548e59246SSatish Balay #endif 466aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4670f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 468fa46199cSSatish Balay col = (col - 1)/bs; 46948e59246SSatish Balay #else 470a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 47148e59246SSatish Balay #endif 472ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 473ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 474ab26458aSBarry Smith col = in[j]; 475ab26458aSBarry Smith } 476ab26458aSBarry Smith } 477ab26458aSBarry Smith else col = in[j]; 47893fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 479ab26458aSBarry Smith } 480ab26458aSBarry Smith } 481d64ed03dSBarry Smith } else { 482ab26458aSBarry Smith if (!baij->donotstash) { 483ff2fd236SBarry Smith if (roworiented) { 4846fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 485ff2fd236SBarry Smith } else { 4866fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 487ff2fd236SBarry Smith } 488abef11f7SSatish Balay } 489ab26458aSBarry Smith } 490ab26458aSBarry Smith } 4913a40ed3dSBarry Smith PetscFunctionReturn(0); 492ab26458aSBarry Smith } 4936fa18ffdSBarry Smith 4940bdbc534SSatish Balay #define HASH_KEY 0.6180339887 495b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 496b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 497b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 4984a2ae208SSatish Balay #undef __FUNCT__ 4994a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 500b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 5010bdbc534SSatish Balay { 5020bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 503273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 504dfbe8321SBarry Smith PetscErrorCode ierr; 505b24ad042SBarry Smith PetscInt i,j,row,col; 506899cda47SBarry Smith PetscInt rstart_orig=mat->rmap.rstart; 507899cda47SBarry Smith PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 508899cda47SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 509329f5518SBarry Smith PetscReal tmp; 5103eda8832SBarry Smith MatScalar **HD = baij->hd,value; 5112515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 512b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5134a15367fSSatish Balay #endif 5140bdbc534SSatish Balay 5150bdbc534SSatish Balay PetscFunctionBegin; 5160bdbc534SSatish Balay 5170bdbc534SSatish Balay for (i=0; i<m; i++) { 5182515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 51929bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 520899cda47SBarry Smith if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 5210bdbc534SSatish Balay #endif 5220bdbc534SSatish Balay row = im[i]; 523c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5240bdbc534SSatish Balay for (j=0; j<n; j++) { 5250bdbc534SSatish Balay col = in[j]; 5266fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 527b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 528c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 529c2760754SSatish Balay h1 = HASH(size,key,tmp); 5300bdbc534SSatish Balay 531c2760754SSatish Balay 532c2760754SSatish Balay idx = h1; 5332515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 534187ce0cbSSatish Balay insert_ct++; 535187ce0cbSSatish Balay total_ct++; 536187ce0cbSSatish Balay if (HT[idx] != key) { 537187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 538187ce0cbSSatish Balay if (idx == size) { 539187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 540187ce0cbSSatish Balay if (idx == h1) { 54177431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 542187ce0cbSSatish Balay } 543187ce0cbSSatish Balay } 544187ce0cbSSatish Balay } 545187ce0cbSSatish Balay #else 546c2760754SSatish Balay if (HT[idx] != key) { 547c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 548c2760754SSatish Balay if (idx == size) { 549c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 550c2760754SSatish Balay if (idx == h1) { 55177431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 552c2760754SSatish Balay } 553c2760754SSatish Balay } 554c2760754SSatish Balay } 555187ce0cbSSatish Balay #endif 556c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 557c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 558c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5590bdbc534SSatish Balay } 5600bdbc534SSatish Balay } else { 5610bdbc534SSatish Balay if (!baij->donotstash) { 562ff2fd236SBarry Smith if (roworiented) { 5638798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 564ff2fd236SBarry Smith } else { 5658798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 5660bdbc534SSatish Balay } 5670bdbc534SSatish Balay } 5680bdbc534SSatish Balay } 5690bdbc534SSatish Balay } 5702515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 571187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 572187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 573187ce0cbSSatish Balay #endif 5740bdbc534SSatish Balay PetscFunctionReturn(0); 5750bdbc534SSatish Balay } 5760bdbc534SSatish Balay 5774a2ae208SSatish Balay #undef __FUNCT__ 5784a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 579b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 5800bdbc534SSatish Balay { 5810bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 583dfbe8321SBarry Smith PetscErrorCode ierr; 584b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 585899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 586ab715e2cSSatish Balay PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2; 587b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 588329f5518SBarry Smith PetscReal tmp; 5893eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 590f15d580aSBarry Smith const MatScalar *v_t,*value; 5912515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 592b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5934a15367fSSatish Balay #endif 5940bdbc534SSatish Balay 595d0a41580SSatish Balay PetscFunctionBegin; 596d0a41580SSatish Balay 5970bdbc534SSatish Balay if (roworiented) { 5980bdbc534SSatish Balay stepval = (n-1)*bs; 5990bdbc534SSatish Balay } else { 6000bdbc534SSatish Balay stepval = (m-1)*bs; 6010bdbc534SSatish Balay } 6020bdbc534SSatish Balay for (i=0; i<m; i++) { 6032515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 60477431f27SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 60577431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 6060bdbc534SSatish Balay #endif 6070bdbc534SSatish Balay row = im[i]; 608ab715e2cSSatish Balay v_t = v + i*nbs2; 609c2760754SSatish Balay if (row >= rstart && row < rend) { 6100bdbc534SSatish Balay for (j=0; j<n; j++) { 6110bdbc534SSatish Balay col = in[j]; 6120bdbc534SSatish Balay 6130bdbc534SSatish Balay /* Look up into the Hash Table */ 614c2760754SSatish Balay key = row*Nbs+col+1; 615c2760754SSatish Balay h1 = HASH(size,key,tmp); 6160bdbc534SSatish Balay 617c2760754SSatish Balay idx = h1; 6182515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 619187ce0cbSSatish Balay total_ct++; 620187ce0cbSSatish Balay insert_ct++; 621187ce0cbSSatish Balay if (HT[idx] != key) { 622187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 623187ce0cbSSatish Balay if (idx == size) { 624187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 625187ce0cbSSatish Balay if (idx == h1) { 62677431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 627187ce0cbSSatish Balay } 628187ce0cbSSatish Balay } 629187ce0cbSSatish Balay } 630187ce0cbSSatish Balay #else 631c2760754SSatish Balay if (HT[idx] != key) { 632c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 633c2760754SSatish Balay if (idx == size) { 634c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 635c2760754SSatish Balay if (idx == h1) { 63677431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 637c2760754SSatish Balay } 638c2760754SSatish Balay } 639c2760754SSatish Balay } 640187ce0cbSSatish Balay #endif 641c2760754SSatish Balay baij_a = HD[idx]; 6420bdbc534SSatish Balay if (roworiented) { 643c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 644187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 645187ce0cbSSatish Balay value = v_t; 646187ce0cbSSatish Balay v_t += bs; 647fef45726SSatish Balay if (addv == ADD_VALUES) { 648c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 649c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 650fef45726SSatish Balay baij_a[jj] += *value++; 651b4cc0f5aSSatish Balay } 652b4cc0f5aSSatish Balay } 653fef45726SSatish Balay } else { 654c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 655c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 656fef45726SSatish Balay baij_a[jj] = *value++; 657fef45726SSatish Balay } 658fef45726SSatish Balay } 659fef45726SSatish Balay } 6600bdbc534SSatish Balay } else { 6610bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 662fef45726SSatish Balay if (addv == ADD_VALUES) { 663b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 6640bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 665fef45726SSatish Balay baij_a[jj] += *value++; 666fef45726SSatish Balay } 667fef45726SSatish Balay } 668fef45726SSatish Balay } else { 669fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 670fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 671fef45726SSatish Balay baij_a[jj] = *value++; 672fef45726SSatish Balay } 673b4cc0f5aSSatish Balay } 6740bdbc534SSatish Balay } 6750bdbc534SSatish Balay } 6760bdbc534SSatish Balay } 6770bdbc534SSatish Balay } else { 6780bdbc534SSatish Balay if (!baij->donotstash) { 6790bdbc534SSatish Balay if (roworiented) { 6808798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6810bdbc534SSatish Balay } else { 6828798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6830bdbc534SSatish Balay } 6840bdbc534SSatish Balay } 6850bdbc534SSatish Balay } 6860bdbc534SSatish Balay } 6872515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 688187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 689187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 690187ce0cbSSatish Balay #endif 6910bdbc534SSatish Balay PetscFunctionReturn(0); 6920bdbc534SSatish Balay } 693133cdb44SSatish Balay 6944a2ae208SSatish Balay #undef __FUNCT__ 6954a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 696b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 697d6de1c52SSatish Balay { 698d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6996849ba73SBarry Smith PetscErrorCode ierr; 700899cda47SBarry Smith PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 701899cda47SBarry Smith PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 702d6de1c52SSatish Balay 703133cdb44SSatish Balay PetscFunctionBegin; 704d6de1c52SSatish Balay for (i=0; i<m; i++) { 70597e567efSBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 706899cda47SBarry Smith if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 707d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 708d6de1c52SSatish Balay row = idxm[i] - bsrstart; 709d6de1c52SSatish Balay for (j=0; j<n; j++) { 71097e567efSBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 711899cda47SBarry Smith if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 712d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 713d6de1c52SSatish Balay col = idxn[j] - bscstart; 71498dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 715d64ed03dSBarry Smith } else { 716905e6a2fSBarry Smith if (!baij->colmap) { 717905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 718905e6a2fSBarry Smith } 719aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7200f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 721fa46199cSSatish Balay data --; 72248e59246SSatish Balay #else 72348e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 72448e59246SSatish Balay #endif 72548e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 726d9d09a02SSatish Balay else { 72748e59246SSatish Balay col = data + idxn[j]%bs; 72898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 729d6de1c52SSatish Balay } 730d6de1c52SSatish Balay } 731d6de1c52SSatish Balay } 732d64ed03dSBarry Smith } else { 73329bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 734d6de1c52SSatish Balay } 735d6de1c52SSatish Balay } 7363a40ed3dSBarry Smith PetscFunctionReturn(0); 737d6de1c52SSatish Balay } 738d6de1c52SSatish Balay 7394a2ae208SSatish Balay #undef __FUNCT__ 7404a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 741dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 742d6de1c52SSatish Balay { 743d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 744d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 745dfbe8321SBarry Smith PetscErrorCode ierr; 746899cda47SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 747329f5518SBarry Smith PetscReal sum = 0.0; 7483eda8832SBarry Smith MatScalar *v; 749d6de1c52SSatish Balay 750d64ed03dSBarry Smith PetscFunctionBegin; 751d6de1c52SSatish Balay if (baij->size == 1) { 752064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 753d6de1c52SSatish Balay } else { 754d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 755d6de1c52SSatish Balay v = amat->a; 7568a62d963SHong Zhang nz = amat->nz*bs2; 7578a62d963SHong Zhang for (i=0; i<nz; i++) { 758aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 759329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 760d6de1c52SSatish Balay #else 761d6de1c52SSatish Balay sum += (*v)*(*v); v++; 762d6de1c52SSatish Balay #endif 763d6de1c52SSatish Balay } 764d6de1c52SSatish Balay v = bmat->a; 7658a62d963SHong Zhang nz = bmat->nz*bs2; 7668a62d963SHong Zhang for (i=0; i<nz; i++) { 767aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 768329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 769d6de1c52SSatish Balay #else 770d6de1c52SSatish Balay sum += (*v)*(*v); v++; 771d6de1c52SSatish Balay #endif 772d6de1c52SSatish Balay } 773064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 774064f8208SBarry Smith *nrm = sqrt(*nrm); 7758a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 7768a62d963SHong Zhang PetscReal *tmp,*tmp2; 777899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 778899cda47SBarry Smith ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 779899cda47SBarry Smith tmp2 = tmp + mat->cmap.N; 780899cda47SBarry Smith ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 7818a62d963SHong Zhang v = amat->a; jj = amat->j; 7828a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 7838a62d963SHong Zhang for (j=0; j<bs; j++){ 7848a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 7858a62d963SHong Zhang for (row=0; row<bs; row++){ 7868a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7878a62d963SHong Zhang } 7888a62d963SHong Zhang } 7898a62d963SHong Zhang jj++; 7908a62d963SHong Zhang } 7918a62d963SHong Zhang v = bmat->a; jj = bmat->j; 7928a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 7938a62d963SHong Zhang for (j=0; j<bs; j++){ 7948a62d963SHong Zhang col = bs*garray[*jj] + j; 7958a62d963SHong Zhang for (row=0; row<bs; row++){ 7968a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7978a62d963SHong Zhang } 7988a62d963SHong Zhang } 7998a62d963SHong Zhang jj++; 8008a62d963SHong Zhang } 801899cda47SBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 8028a62d963SHong Zhang *nrm = 0.0; 803899cda47SBarry Smith for (j=0; j<mat->cmap.N; j++) { 8048a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8058a62d963SHong Zhang } 8068a62d963SHong Zhang ierr = PetscFree(tmp);CHKERRQ(ierr); 8078a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 808577dd1f9SKris Buschelman PetscReal *sums; 809577dd1f9SKris Buschelman ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 8108a62d963SHong Zhang sum = 0.0; 8118a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 8128a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 8138a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 8148a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 8158a62d963SHong Zhang for (i=0; i<nz; i++) { 8168a62d963SHong Zhang for (col=0; col<bs; col++){ 8178a62d963SHong Zhang for (row=0; row<bs; row++){ 8188a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8198a62d963SHong Zhang } 8208a62d963SHong Zhang } 8218a62d963SHong Zhang } 8228a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 8238a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 8248a62d963SHong Zhang for (i=0; i<nz; i++) { 8258a62d963SHong Zhang for (col=0; col<bs; col++){ 8268a62d963SHong Zhang for (row=0; row<bs; row++){ 8278a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 8288a62d963SHong Zhang } 8298a62d963SHong Zhang } 8308a62d963SHong Zhang } 8318a62d963SHong Zhang for (row=0; row<bs; row++){ 8328a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 8338a62d963SHong Zhang } 8348a62d963SHong Zhang } 8358a62d963SHong Zhang ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 836577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 837d64ed03dSBarry Smith } else { 83829bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 839d6de1c52SSatish Balay } 840d64ed03dSBarry Smith } 8413a40ed3dSBarry Smith PetscFunctionReturn(0); 842d6de1c52SSatish Balay } 84357b952d6SSatish Balay 844fef45726SSatish Balay /* 845fef45726SSatish Balay Creates the hash table, and sets the table 846fef45726SSatish Balay This table is created only once. 847fef45726SSatish Balay If new entried need to be added to the matrix 848fef45726SSatish Balay then the hash table has to be destroyed and 849fef45726SSatish Balay recreated. 850fef45726SSatish Balay */ 8514a2ae208SSatish Balay #undef __FUNCT__ 8524a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 853dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 854596b8d2eSBarry Smith { 855596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 856596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 857596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 858b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8596849ba73SBarry Smith PetscErrorCode ierr; 860899cda47SBarry Smith PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 861899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 862b24ad042SBarry Smith PetscInt *HT,key; 8633eda8832SBarry Smith MatScalar **HD; 864329f5518SBarry Smith PetscReal tmp; 8656cf91177SBarry Smith #if defined(PETSC_USE_INFO) 866b24ad042SBarry Smith PetscInt ct=0,max=0; 8674a15367fSSatish Balay #endif 868fef45726SSatish Balay 869d64ed03dSBarry Smith PetscFunctionBegin; 870b24ad042SBarry Smith baij->ht_size=(PetscInt)(factor*nz); 8710bdbc534SSatish Balay size = baij->ht_size; 872fef45726SSatish Balay 8730bdbc534SSatish Balay if (baij->ht) { 8740bdbc534SSatish Balay PetscFunctionReturn(0); 875596b8d2eSBarry Smith } 8760bdbc534SSatish Balay 877fef45726SSatish Balay /* Allocate Memory for Hash Table */ 878b24ad042SBarry Smith ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 879b24ad042SBarry Smith baij->ht = (PetscInt*)(baij->hd + size); 880b9e4cc15SSatish Balay HD = baij->hd; 881a07cd24cSSatish Balay HT = baij->ht; 882b9e4cc15SSatish Balay 883b9e4cc15SSatish Balay 884b24ad042SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8850bdbc534SSatish Balay 886596b8d2eSBarry Smith 887596b8d2eSBarry Smith /* Loop Over A */ 8880bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 889596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8900bdbc534SSatish Balay row = i+rstart; 8910bdbc534SSatish Balay col = aj[j]+cstart; 892596b8d2eSBarry Smith 893187ce0cbSSatish Balay key = row*Nbs + col + 1; 894c2760754SSatish Balay h1 = HASH(size,key,tmp); 8950bdbc534SSatish Balay for (k=0; k<size; k++){ 896958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 8970bdbc534SSatish Balay HT[(h1+k)%size] = key; 8980bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 899596b8d2eSBarry Smith break; 9006cf91177SBarry Smith #if defined(PETSC_USE_INFO) 901187ce0cbSSatish Balay } else { 902187ce0cbSSatish Balay ct++; 903187ce0cbSSatish Balay #endif 904596b8d2eSBarry Smith } 905187ce0cbSSatish Balay } 9066cf91177SBarry Smith #if defined(PETSC_USE_INFO) 907187ce0cbSSatish Balay if (k> max) max = k; 908187ce0cbSSatish Balay #endif 909596b8d2eSBarry Smith } 910596b8d2eSBarry Smith } 911596b8d2eSBarry Smith /* Loop Over B */ 9120bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 913596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9140bdbc534SSatish Balay row = i+rstart; 9150bdbc534SSatish Balay col = garray[bj[j]]; 916187ce0cbSSatish Balay key = row*Nbs + col + 1; 917c2760754SSatish Balay h1 = HASH(size,key,tmp); 9180bdbc534SSatish Balay for (k=0; k<size; k++){ 919958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9200bdbc534SSatish Balay HT[(h1+k)%size] = key; 9210bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 922596b8d2eSBarry Smith break; 9236cf91177SBarry Smith #if defined(PETSC_USE_INFO) 924187ce0cbSSatish Balay } else { 925187ce0cbSSatish Balay ct++; 926187ce0cbSSatish Balay #endif 927596b8d2eSBarry Smith } 928187ce0cbSSatish Balay } 9296cf91177SBarry Smith #if defined(PETSC_USE_INFO) 930187ce0cbSSatish Balay if (k> max) max = k; 931187ce0cbSSatish Balay #endif 932596b8d2eSBarry Smith } 933596b8d2eSBarry Smith } 934596b8d2eSBarry Smith 935596b8d2eSBarry Smith /* Print Summary */ 9366cf91177SBarry Smith #if defined(PETSC_USE_INFO) 937c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 938596b8d2eSBarry Smith if (HT[i]) {j++;} 939c38d4ed2SBarry Smith } 940ae15b995SBarry Smith ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 941187ce0cbSSatish Balay #endif 9423a40ed3dSBarry Smith PetscFunctionReturn(0); 943596b8d2eSBarry Smith } 94457b952d6SSatish Balay 9454a2ae208SSatish Balay #undef __FUNCT__ 9464a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 947dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 948bbb85fb3SSatish Balay { 949bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 950dfbe8321SBarry Smith PetscErrorCode ierr; 951b24ad042SBarry Smith PetscInt nstash,reallocs; 952bbb85fb3SSatish Balay InsertMode addv; 953bbb85fb3SSatish Balay 954bbb85fb3SSatish Balay PetscFunctionBegin; 955bbb85fb3SSatish Balay if (baij->donotstash) { 956bbb85fb3SSatish Balay PetscFunctionReturn(0); 957bbb85fb3SSatish Balay } 958bbb85fb3SSatish Balay 959bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 960bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 961bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 96229bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 963bbb85fb3SSatish Balay } 964bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 965bbb85fb3SSatish Balay 966899cda47SBarry Smith ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 967899cda47SBarry Smith ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 9688798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 969ae15b995SBarry Smith ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 97046680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 971ae15b995SBarry Smith ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 972bbb85fb3SSatish Balay PetscFunctionReturn(0); 973bbb85fb3SSatish Balay } 974bbb85fb3SSatish Balay 9754a2ae208SSatish Balay #undef __FUNCT__ 9764a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 977dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 978bbb85fb3SSatish Balay { 979bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 98091c97fd4SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 9816849ba73SBarry Smith PetscErrorCode ierr; 982b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 983b24ad042SBarry Smith PetscInt *row,*col,other_disassembled; 9847c922b88SBarry Smith PetscTruth r1,r2,r3; 9853eda8832SBarry Smith MatScalar *val; 986bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 987b24ad042SBarry Smith PetscMPIInt n; 988bbb85fb3SSatish Balay 98991c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 990bbb85fb3SSatish Balay PetscFunctionBegin; 991bbb85fb3SSatish Balay if (!baij->donotstash) { 992a2d1c673SSatish Balay while (1) { 9938798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 994a2d1c673SSatish Balay if (!flg) break; 995a2d1c673SSatish Balay 996bbb85fb3SSatish Balay for (i=0; i<n;) { 997bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 998bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 999bbb85fb3SSatish Balay if (j < n) ncols = j-i; 1000bbb85fb3SSatish Balay else ncols = n-i; 1001bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 100293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1003bbb85fb3SSatish Balay i = j; 1004bbb85fb3SSatish Balay } 1005bbb85fb3SSatish Balay } 10068798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1007a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1008a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1009a2d1c673SSatish Balay restore the original flags */ 1010a2d1c673SSatish Balay r1 = baij->roworiented; 1011a2d1c673SSatish Balay r2 = a->roworiented; 101291c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 10137c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10147c922b88SBarry Smith a->roworiented = PETSC_FALSE; 101591c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1016a2d1c673SSatish Balay while (1) { 10178798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1018a2d1c673SSatish Balay if (!flg) break; 1019a2d1c673SSatish Balay 1020a2d1c673SSatish Balay for (i=0; i<n;) { 1021a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1022a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1023a2d1c673SSatish Balay if (j < n) ncols = j-i; 1024a2d1c673SSatish Balay else ncols = n-i; 102593fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1026a2d1c673SSatish Balay i = j; 1027a2d1c673SSatish Balay } 1028a2d1c673SSatish Balay } 10298798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1030a2d1c673SSatish Balay baij->roworiented = r1; 1031a2d1c673SSatish Balay a->roworiented = r2; 103291c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1033bbb85fb3SSatish Balay } 1034bbb85fb3SSatish Balay 1035bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1036bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1037bbb85fb3SSatish Balay 1038bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1039bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1040bbb85fb3SSatish Balay /* 1041bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1042bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1043bbb85fb3SSatish Balay */ 1044bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1045bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1046bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1047bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1048bbb85fb3SSatish Balay } 1049bbb85fb3SSatish Balay } 1050bbb85fb3SSatish Balay 1051bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1052bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1053bbb85fb3SSatish Balay } 105491c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1055bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1056bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1057bbb85fb3SSatish Balay 10586cf91177SBarry Smith #if defined(PETSC_USE_INFO) 1059bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1060ae15b995SBarry Smith ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1061bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1062bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1063bbb85fb3SSatish Balay } 1064bbb85fb3SSatish Balay #endif 1065bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1066bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1067bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1068bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1069bbb85fb3SSatish Balay } 1070bbb85fb3SSatish Balay 1071606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1072606d414cSSatish Balay baij->rowvalues = 0; 1073bbb85fb3SSatish Balay PetscFunctionReturn(0); 1074bbb85fb3SSatish Balay } 107557b952d6SSatish Balay 10764a2ae208SSatish Balay #undef __FUNCT__ 10774a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10786849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 107957b952d6SSatish Balay { 108057b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1081dfbe8321SBarry Smith PetscErrorCode ierr; 1082b24ad042SBarry Smith PetscMPIInt size = baij->size,rank = baij->rank; 1083899cda47SBarry Smith PetscInt bs = mat->rmap.bs; 108432077d6dSBarry Smith PetscTruth iascii,isdraw; 1085b0a32e0cSBarry Smith PetscViewer sviewer; 1086f3ef73ceSBarry Smith PetscViewerFormat format; 108757b952d6SSatish Balay 1088d64ed03dSBarry Smith PetscFunctionBegin; 108932077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1090fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 109132077d6dSBarry Smith if (iascii) { 1092b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1093456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 10944e220ebcSLois Curfman McInnes MatInfo info; 1095d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1096d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 109777431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1098899cda47SBarry Smith rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1099899cda47SBarry Smith mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1100d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 110177431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 110377431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1104b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 110507d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 110657b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 11073a40ed3dSBarry Smith PetscFunctionReturn(0); 1108fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 110977431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 11103a40ed3dSBarry Smith PetscFunctionReturn(0); 111104929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 111204929863SHong Zhang PetscFunctionReturn(0); 111357b952d6SSatish Balay } 111457b952d6SSatish Balay } 111557b952d6SSatish Balay 11160f5bd95cSBarry Smith if (isdraw) { 1117b0a32e0cSBarry Smith PetscDraw draw; 111857b952d6SSatish Balay PetscTruth isnull; 1119b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1120b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 112157b952d6SSatish Balay } 112257b952d6SSatish Balay 112357b952d6SSatish Balay if (size == 1) { 1124873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 112557b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1126d64ed03dSBarry Smith } else { 112757b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 112857b952d6SSatish Balay Mat A; 112957b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1130899cda47SBarry Smith PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11313eda8832SBarry Smith MatScalar *a; 113257b952d6SSatish Balay 1133f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1134f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 1135f69a0ea3SMatthew Knepley ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 113657b952d6SSatish Balay if (!rank) { 1137f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138d64ed03dSBarry Smith } else { 1139f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 114057b952d6SSatish Balay } 1141f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1142899cda47SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 114352e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 114457b952d6SSatish Balay 114557b952d6SSatish Balay /* copy over the A part */ 114657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 114757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 114957b952d6SSatish Balay 115057b952d6SSatish Balay for (i=0; i<mbs; i++) { 1151899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 115257b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 115357b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1154899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 115557b952d6SSatish Balay for (k=0; k<bs; k++) { 115693fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157cee3aa6bSSatish Balay col++; a += bs; 115857b952d6SSatish Balay } 115957b952d6SSatish Balay } 116057b952d6SSatish Balay } 116157b952d6SSatish Balay /* copy over the B part */ 116257b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 116357b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 116457b952d6SSatish Balay for (i=0; i<mbs; i++) { 1165899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 116657b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 116757b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 116857b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 116957b952d6SSatish Balay for (k=0; k<bs; k++) { 117093fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1171cee3aa6bSSatish Balay col++; a += bs; 117257b952d6SSatish Balay } 117357b952d6SSatish Balay } 117457b952d6SSatish Balay } 1175606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11766d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11776d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117855843e3eSBarry Smith /* 117955843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1180b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 118155843e3eSBarry Smith */ 1182b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183f14a1c24SBarry Smith if (!rank) { 1184e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11856831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 118657b952d6SSatish Balay } 1187b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 118857b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 118957b952d6SSatish Balay } 11903a40ed3dSBarry Smith PetscFunctionReturn(0); 119157b952d6SSatish Balay } 119257b952d6SSatish Balay 11934a2ae208SSatish Balay #undef __FUNCT__ 11944a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1195dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 119657b952d6SSatish Balay { 1197dfbe8321SBarry Smith PetscErrorCode ierr; 119832077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 119957b952d6SSatish Balay 1200d64ed03dSBarry Smith PetscFunctionBegin; 120132077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1202fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1203b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1204fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 120532077d6dSBarry Smith if (iascii || isdraw || issocket || isbinary) { 12067b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12075cd90555SBarry Smith } else { 120879a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 120957b952d6SSatish Balay } 12103a40ed3dSBarry Smith PetscFunctionReturn(0); 121157b952d6SSatish Balay } 121257b952d6SSatish Balay 12134a2ae208SSatish Balay #undef __FUNCT__ 12144a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1215dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 121679bdfe76SSatish Balay { 121779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1218dfbe8321SBarry Smith PetscErrorCode ierr; 121979bdfe76SSatish Balay 1220d64ed03dSBarry Smith PetscFunctionBegin; 1221aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1222899cda47SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 122379bdfe76SSatish Balay #endif 12248798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12258798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 122679bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 122779bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12299c666560SBarry Smith if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 123048e59246SSatish Balay #else 123105b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 123248e59246SSatish Balay #endif 123305b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1234606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 123605b42c5fSBarry Smith ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 123705b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 123805b42c5fSBarry Smith ierr = PetscFree(baij->hd);CHKERRQ(ierr); 12396fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 124005b42c5fSBarry Smith ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 12416fa18ffdSBarry Smith #endif 1242899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1243606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1244901853e0SKris Buschelman 1245dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1246901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1247901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1248901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1249901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1250aac34f13SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1251901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1252901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 12533a40ed3dSBarry Smith PetscFunctionReturn(0); 125479bdfe76SSatish Balay } 125579bdfe76SSatish Balay 12564a2ae208SSatish Balay #undef __FUNCT__ 12574a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1258dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1259cee3aa6bSSatish Balay { 1260cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1261dfbe8321SBarry Smith PetscErrorCode ierr; 1262b24ad042SBarry Smith PetscInt nt; 1263cee3aa6bSSatish Balay 1264d64ed03dSBarry Smith PetscFunctionBegin; 1265e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1266899cda47SBarry Smith if (nt != A->cmap.n) { 126729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 126847b4a8eaSLois Curfman McInnes } 1269e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1270899cda47SBarry Smith if (nt != A->rmap.n) { 127129bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 127247b4a8eaSLois Curfman McInnes } 1273ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1275ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 12773a40ed3dSBarry Smith PetscFunctionReturn(0); 1278cee3aa6bSSatish Balay } 1279cee3aa6bSSatish Balay 12804a2ae208SSatish Balay #undef __FUNCT__ 12814a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1282dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1283cee3aa6bSSatish Balay { 1284cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1285dfbe8321SBarry Smith PetscErrorCode ierr; 1286d64ed03dSBarry Smith 1287d64ed03dSBarry Smith PetscFunctionBegin; 1288ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1289f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1290ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12923a40ed3dSBarry Smith PetscFunctionReturn(0); 1293cee3aa6bSSatish Balay } 1294cee3aa6bSSatish Balay 12954a2ae208SSatish Balay #undef __FUNCT__ 12964a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1297dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1298cee3aa6bSSatish Balay { 1299cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1300dfbe8321SBarry Smith PetscErrorCode ierr; 1301a5ff213dSBarry Smith PetscTruth merged; 1302cee3aa6bSSatish Balay 1303d64ed03dSBarry Smith PetscFunctionBegin; 1304a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1305cee3aa6bSSatish Balay /* do nondiagonal part */ 13067c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1307a5ff213dSBarry Smith if (!merged) { 1308cee3aa6bSSatish Balay /* send it on its way */ 1309ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1310cee3aa6bSSatish Balay /* do local part */ 13117c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1313a5ff213dSBarry Smith /* inserted in yy until the next line */ 1314ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315a5ff213dSBarry Smith } else { 1316a5ff213dSBarry Smith /* do local part */ 1317a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1318a5ff213dSBarry Smith /* send it on its way */ 1319ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1321ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1322a5ff213dSBarry Smith } 13233a40ed3dSBarry Smith PetscFunctionReturn(0); 1324cee3aa6bSSatish Balay } 1325cee3aa6bSSatish Balay 13264a2ae208SSatish Balay #undef __FUNCT__ 13274a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1328dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1329cee3aa6bSSatish Balay { 1330cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331dfbe8321SBarry Smith PetscErrorCode ierr; 1332cee3aa6bSSatish Balay 1333d64ed03dSBarry Smith PetscFunctionBegin; 1334cee3aa6bSSatish Balay /* do nondiagonal part */ 13357c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1336cee3aa6bSSatish Balay /* send it on its way */ 1337ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1338cee3aa6bSSatish Balay /* do local part */ 13397c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1340cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1341cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1342cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1343ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 13443a40ed3dSBarry Smith PetscFunctionReturn(0); 1345cee3aa6bSSatish Balay } 1346cee3aa6bSSatish Balay 1347cee3aa6bSSatish Balay /* 1348cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1349cee3aa6bSSatish Balay diagonal block 1350cee3aa6bSSatish Balay */ 13514a2ae208SSatish Balay #undef __FUNCT__ 13524a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1353dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1354cee3aa6bSSatish Balay { 1355cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356dfbe8321SBarry Smith PetscErrorCode ierr; 1357d64ed03dSBarry Smith 1358d64ed03dSBarry Smith PetscFunctionBegin; 1359899cda47SBarry Smith if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13603a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13613a40ed3dSBarry Smith PetscFunctionReturn(0); 1362cee3aa6bSSatish Balay } 1363cee3aa6bSSatish Balay 13644a2ae208SSatish Balay #undef __FUNCT__ 13654a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1366f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1367cee3aa6bSSatish Balay { 1368cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369dfbe8321SBarry Smith PetscErrorCode ierr; 1370d64ed03dSBarry Smith 1371d64ed03dSBarry Smith PetscFunctionBegin; 1372f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1373f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 13743a40ed3dSBarry Smith PetscFunctionReturn(0); 1375cee3aa6bSSatish Balay } 1376026e39d0SSatish Balay 13774a2ae208SSatish Balay #undef __FUNCT__ 13784a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1379b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1380acdf5bf4SSatish Balay { 1381acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 138287828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13836849ba73SBarry Smith PetscErrorCode ierr; 1384899cda47SBarry Smith PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1385899cda47SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1386899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1387acdf5bf4SSatish Balay 1388d64ed03dSBarry Smith PetscFunctionBegin; 1389abc0a331SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1390acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1391acdf5bf4SSatish Balay 1392acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1393acdf5bf4SSatish Balay /* 1394acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1395acdf5bf4SSatish Balay */ 1396acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1397b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1398bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1399acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1400acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1401acdf5bf4SSatish Balay } 1402b24ad042SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1403b24ad042SBarry Smith mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1404acdf5bf4SSatish Balay } 1405acdf5bf4SSatish Balay 140629bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1407d9d09a02SSatish Balay lrow = row - brstart; 1408acdf5bf4SSatish Balay 1409acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1410acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1411acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1412f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1413f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1414acdf5bf4SSatish Balay nztot = nzA + nzB; 1415acdf5bf4SSatish Balay 1416acdf5bf4SSatish Balay cmap = mat->garray; 1417acdf5bf4SSatish Balay if (v || idx) { 1418acdf5bf4SSatish Balay if (nztot) { 1419acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1420b24ad042SBarry Smith PetscInt imark = -1; 1421acdf5bf4SSatish Balay if (v) { 1422acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1423acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1424d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1425acdf5bf4SSatish Balay else break; 1426acdf5bf4SSatish Balay } 1427acdf5bf4SSatish Balay imark = i; 1428acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1429acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1430acdf5bf4SSatish Balay } 1431acdf5bf4SSatish Balay if (idx) { 1432acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1433acdf5bf4SSatish Balay if (imark > -1) { 1434acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1435bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1436acdf5bf4SSatish Balay } 1437acdf5bf4SSatish Balay } else { 1438acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1439d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1440d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441acdf5bf4SSatish Balay else break; 1442acdf5bf4SSatish Balay } 1443acdf5bf4SSatish Balay imark = i; 1444acdf5bf4SSatish Balay } 1445d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1446d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1447acdf5bf4SSatish Balay } 1448d64ed03dSBarry Smith } else { 1449d212a18eSSatish Balay if (idx) *idx = 0; 1450d212a18eSSatish Balay if (v) *v = 0; 1451d212a18eSSatish Balay } 1452acdf5bf4SSatish Balay } 1453acdf5bf4SSatish Balay *nz = nztot; 1454f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14563a40ed3dSBarry Smith PetscFunctionReturn(0); 1457acdf5bf4SSatish Balay } 1458acdf5bf4SSatish Balay 14594a2ae208SSatish Balay #undef __FUNCT__ 14604a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1461b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462acdf5bf4SSatish Balay { 1463acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1464d64ed03dSBarry Smith 1465d64ed03dSBarry Smith PetscFunctionBegin; 1466abc0a331SBarry Smith if (!baij->getrowactive) { 146729bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1468acdf5bf4SSatish Balay } 1469acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14703a40ed3dSBarry Smith PetscFunctionReturn(0); 1471acdf5bf4SSatish Balay } 1472acdf5bf4SSatish Balay 14734a2ae208SSatish Balay #undef __FUNCT__ 14744a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1475dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 147658667388SSatish Balay { 147758667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1478dfbe8321SBarry Smith PetscErrorCode ierr; 1479d64ed03dSBarry Smith 1480d64ed03dSBarry Smith PetscFunctionBegin; 148158667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 148258667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14833a40ed3dSBarry Smith PetscFunctionReturn(0); 148458667388SSatish Balay } 14850ac07820SSatish Balay 14864a2ae208SSatish Balay #undef __FUNCT__ 14874a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1488dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14890ac07820SSatish Balay { 14904e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14914e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1492dfbe8321SBarry Smith PetscErrorCode ierr; 1493329f5518SBarry Smith PetscReal isend[5],irecv[5]; 14940ac07820SSatish Balay 1495d64ed03dSBarry Smith PetscFunctionBegin; 1496899cda47SBarry Smith info->block_size = (PetscReal)matin->rmap.bs; 14974e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 14980e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1499de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 15004e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 15010e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1502de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 15030ac07820SSatish Balay if (flag == MAT_LOCAL) { 15044e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15054e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15064e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15074e220ebcSLois Curfman McInnes info->memory = isend[3]; 15084e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15090ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1510d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 15114e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15124e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15134e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15144e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15154e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15160ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1517d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 15184e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15194e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15204e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15214e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15224e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1523d41123aaSBarry Smith } else { 152477431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15250ac07820SSatish Balay } 1526899cda47SBarry Smith info->rows_global = (PetscReal)A->rmap.N; 1527899cda47SBarry Smith info->columns_global = (PetscReal)A->cmap.N; 1528899cda47SBarry Smith info->rows_local = (PetscReal)A->rmap.N; 1529899cda47SBarry Smith info->columns_local = (PetscReal)A->cmap.N; 15304e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15314e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15324e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15333a40ed3dSBarry Smith PetscFunctionReturn(0); 15340ac07820SSatish Balay } 15350ac07820SSatish Balay 15364a2ae208SSatish Balay #undef __FUNCT__ 15374a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1538*4e0d8c25SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 153958667388SSatish Balay { 154058667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1541dfbe8321SBarry Smith PetscErrorCode ierr; 154258667388SSatish Balay 1543d64ed03dSBarry Smith PetscFunctionBegin; 154412c028f9SKris Buschelman switch (op) { 154512c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 154612c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 154712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 154812c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 154912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 1550*4e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1551*4e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 155212c028f9SKris Buschelman break; 155312c028f9SKris Buschelman case MAT_ROW_ORIENTED: 1554*4e0d8c25SBarry Smith a->roworiented = flg; 1555*4e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1556*4e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 155712c028f9SKris Buschelman break; 155812c028f9SKris Buschelman case MAT_ROWS_SORTED: 1559*4e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1560290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 156112c028f9SKris Buschelman break; 156212c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1563*4e0d8c25SBarry Smith a->donotstash = flg; 156412c028f9SKris Buschelman break; 156512c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 1566*4e0d8c25SBarry Smith a->ht_flag = flg; 156712c028f9SKris Buschelman break; 156877e54ba9SKris Buschelman case MAT_SYMMETRIC: 156977e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15702188ac68SBarry Smith case MAT_HERMITIAN: 15712188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 1572*4e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 157377e54ba9SKris Buschelman break; 157412c028f9SKris Buschelman default: 1575ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1576d64ed03dSBarry Smith } 15773a40ed3dSBarry Smith PetscFunctionReturn(0); 157858667388SSatish Balay } 157958667388SSatish Balay 15804a2ae208SSatish Balay #undef __FUNCT__ 15814a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1582dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 15830ac07820SSatish Balay { 15840ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15850ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15860ac07820SSatish Balay Mat B; 1587dfbe8321SBarry Smith PetscErrorCode ierr; 1588899cda47SBarry Smith PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1589899cda47SBarry Smith PetscInt bs=A->rmap.bs,mbs=baij->mbs; 15903eda8832SBarry Smith MatScalar *a; 15910ac07820SSatish Balay 1592d64ed03dSBarry Smith PetscFunctionBegin; 159329bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1594f69a0ea3SMatthew Knepley ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1595899cda47SBarry Smith ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1596f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1597899cda47SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 15980ac07820SSatish Balay 15990ac07820SSatish Balay /* copy over the A part */ 16000ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16010ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1602b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 16030ac07820SSatish Balay 16040ac07820SSatish Balay for (i=0; i<mbs; i++) { 1605899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 16060ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16070ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1608899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 16090ac07820SSatish Balay for (k=0; k<bs; k++) { 161093fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16110ac07820SSatish Balay col++; a += bs; 16120ac07820SSatish Balay } 16130ac07820SSatish Balay } 16140ac07820SSatish Balay } 16150ac07820SSatish Balay /* copy over the B part */ 16160ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16170ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16180ac07820SSatish Balay for (i=0; i<mbs; i++) { 1619899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 16200ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16210ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16220ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16230ac07820SSatish Balay for (k=0; k<bs; k++) { 162493fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16250ac07820SSatish Balay col++; a += bs; 16260ac07820SSatish Balay } 16270ac07820SSatish Balay } 16280ac07820SSatish Balay } 1629606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16300ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16310ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16320ac07820SSatish Balay 16337c922b88SBarry Smith if (matout) { 16340ac07820SSatish Balay *matout = B; 16350ac07820SSatish Balay } else { 1636273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16370ac07820SSatish Balay } 16383a40ed3dSBarry Smith PetscFunctionReturn(0); 16390ac07820SSatish Balay } 16400e95ebc0SSatish Balay 16414a2ae208SSatish Balay #undef __FUNCT__ 16424a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1643dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16440e95ebc0SSatish Balay { 164536c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 164636c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1647dfbe8321SBarry Smith PetscErrorCode ierr; 1648b24ad042SBarry Smith PetscInt s1,s2,s3; 16490e95ebc0SSatish Balay 1650d64ed03dSBarry Smith PetscFunctionBegin; 165136c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 165236c4a09eSSatish Balay if (rr) { 165336c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 165429bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 165536c4a09eSSatish Balay /* Overlap communication with computation. */ 1656ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 165736c4a09eSSatish Balay } 16580e95ebc0SSatish Balay if (ll) { 16590e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 166029bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1661a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16620e95ebc0SSatish Balay } 166336c4a09eSSatish Balay /* scale the diagonal block */ 166436c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 166536c4a09eSSatish Balay 166636c4a09eSSatish Balay if (rr) { 166736c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1668ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 167036c4a09eSSatish Balay } 167136c4a09eSSatish Balay 16723a40ed3dSBarry Smith PetscFunctionReturn(0); 16730e95ebc0SSatish Balay } 16740e95ebc0SSatish Balay 16754a2ae208SSatish Balay #undef __FUNCT__ 16764a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1677f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 16780ac07820SSatish Balay { 16790ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16806849ba73SBarry Smith PetscErrorCode ierr; 1681b24ad042SBarry Smith PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1682899cda47SBarry Smith PetscInt i,*owners = A->rmap.range; 1683b24ad042SBarry Smith PetscInt *nprocs,j,idx,nsends,row; 1684b24ad042SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 16856543fbbaSBarry Smith PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1686357c27ecSBarry Smith PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 16870ac07820SSatish Balay MPI_Comm comm = A->comm; 16880ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 16890ac07820SSatish Balay MPI_Status recv_status,*send_status; 16906543fbbaSBarry Smith #if defined(PETSC_DEBUG) 16916543fbbaSBarry Smith PetscTruth found = PETSC_FALSE; 16926543fbbaSBarry Smith #endif 16930ac07820SSatish Balay 1694d64ed03dSBarry Smith PetscFunctionBegin; 16950ac07820SSatish Balay /* first count number of contributors to each processor */ 1696b24ad042SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1697b24ad042SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1698b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 16996543fbbaSBarry Smith j = 0; 17000ac07820SSatish Balay for (i=0; i<N; i++) { 17016543fbbaSBarry Smith if (lastidx > (idx = rows[i])) j = 0; 17026543fbbaSBarry Smith lastidx = idx; 17036543fbbaSBarry Smith for (; j<size; j++) { 1704357c27ecSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 17056543fbbaSBarry Smith nprocs[2*j]++; 17066543fbbaSBarry Smith nprocs[2*j+1] = 1; 17076543fbbaSBarry Smith owner[i] = j; 17086543fbbaSBarry Smith #if defined(PETSC_DEBUG) 17096543fbbaSBarry Smith found = PETSC_TRUE; 17106543fbbaSBarry Smith #endif 17116543fbbaSBarry Smith break; 17120ac07820SSatish Balay } 17130ac07820SSatish Balay } 17146543fbbaSBarry Smith #if defined(PETSC_DEBUG) 171529bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17166543fbbaSBarry Smith found = PETSC_FALSE; 17176543fbbaSBarry Smith #endif 17180ac07820SSatish Balay } 1719c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17200ac07820SSatish Balay 17210ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1722c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17230ac07820SSatish Balay 17240ac07820SSatish Balay /* post receives: */ 1725b24ad042SBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1726b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17270ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1728b24ad042SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17290ac07820SSatish Balay } 17300ac07820SSatish Balay 17310ac07820SSatish Balay /* do sends: 17320ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17330ac07820SSatish Balay the ith processor 17340ac07820SSatish Balay */ 1735b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1736b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1737b24ad042SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 17380ac07820SSatish Balay starts[0] = 0; 1739c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17400ac07820SSatish Balay for (i=0; i<N; i++) { 17410ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17420ac07820SSatish Balay } 17430ac07820SSatish Balay 17440ac07820SSatish Balay starts[0] = 0; 1745c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17460ac07820SSatish Balay count = 0; 17470ac07820SSatish Balay for (i=0; i<size; i++) { 1748c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1749b24ad042SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17500ac07820SSatish Balay } 17510ac07820SSatish Balay } 1752606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17530ac07820SSatish Balay 1754357c27ecSBarry Smith base = owners[rank]; 17550ac07820SSatish Balay 17560ac07820SSatish Balay /* wait on receives */ 1757b24ad042SBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 17580ac07820SSatish Balay source = lens + nrecvs; 17590ac07820SSatish Balay count = nrecvs; slen = 0; 17600ac07820SSatish Balay while (count) { 1761ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17620ac07820SSatish Balay /* unpack receives into our local space */ 1763b24ad042SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 17640ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17650ac07820SSatish Balay lens[imdex] = n; 17660ac07820SSatish Balay slen += n; 17670ac07820SSatish Balay count--; 17680ac07820SSatish Balay } 1769606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17700ac07820SSatish Balay 17710ac07820SSatish Balay /* move the data into the send scatter */ 1772b24ad042SBarry Smith ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 17730ac07820SSatish Balay count = 0; 17740ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17750ac07820SSatish Balay values = rvalues + i*nmax; 17760ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17770ac07820SSatish Balay lrows[count++] = values[j] - base; 17780ac07820SSatish Balay } 17790ac07820SSatish Balay } 1780606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1781606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1782606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1783606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17840ac07820SSatish Balay 17850ac07820SSatish Balay /* actually zap the local rows */ 178672dacd9aSBarry Smith /* 178772dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1788a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 178972dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 179072dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 179172dacd9aSBarry Smith 1792f4df32b1SMatthew Knepley Contributed by: Matthew Knepley 179372dacd9aSBarry Smith */ 17949c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1795f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1796899cda47SBarry Smith if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1797f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1798f4df32b1SMatthew Knepley } else if (diag != 0.0) { 1799f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1800fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 180129bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1802fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18036525c446SSatish Balay } 1804a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1805a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 1806f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1807a07cd24cSSatish Balay } 1808a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18109c957beeSSatish Balay } else { 1811f4df32b1SMatthew Knepley ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1812a07cd24cSSatish Balay } 18139c957beeSSatish Balay 1814606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1815a07cd24cSSatish Balay 18160ac07820SSatish Balay /* wait on sends */ 18170ac07820SSatish Balay if (nsends) { 181882502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1819ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1820606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18210ac07820SSatish Balay } 1822606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1823606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18240ac07820SSatish Balay 18253a40ed3dSBarry Smith PetscFunctionReturn(0); 18260ac07820SSatish Balay } 182772dacd9aSBarry Smith 18284a2ae208SSatish Balay #undef __FUNCT__ 18294a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1830dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1831bb5a7306SBarry Smith { 1832bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1833dfbe8321SBarry Smith PetscErrorCode ierr; 1834d64ed03dSBarry Smith 1835d64ed03dSBarry Smith PetscFunctionBegin; 1836bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18373a40ed3dSBarry Smith PetscFunctionReturn(0); 1838bb5a7306SBarry Smith } 1839bb5a7306SBarry Smith 18406849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18410ac07820SSatish Balay 18424a2ae208SSatish Balay #undef __FUNCT__ 18434a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1844dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18457fc3c18eSBarry Smith { 18467fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18477fc3c18eSBarry Smith Mat a,b,c,d; 18487fc3c18eSBarry Smith PetscTruth flg; 1849dfbe8321SBarry Smith PetscErrorCode ierr; 18507fc3c18eSBarry Smith 18517fc3c18eSBarry Smith PetscFunctionBegin; 18527fc3c18eSBarry Smith a = matA->A; b = matA->B; 18537fc3c18eSBarry Smith c = matB->A; d = matB->B; 18547fc3c18eSBarry Smith 18557fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1856abc0a331SBarry Smith if (flg) { 18577fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18587fc3c18eSBarry Smith } 18597fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18607fc3c18eSBarry Smith PetscFunctionReturn(0); 18617fc3c18eSBarry Smith } 18627fc3c18eSBarry Smith 18633c896bc6SHong Zhang #undef __FUNCT__ 18643c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ" 18653c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 18663c896bc6SHong Zhang { 18673c896bc6SHong Zhang PetscErrorCode ierr; 18683c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 18693c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 18703c896bc6SHong Zhang 18713c896bc6SHong Zhang PetscFunctionBegin; 18723c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 18733c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 18743c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 18753c896bc6SHong Zhang } else { 18763c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 18773c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 18783c896bc6SHong Zhang } 18793c896bc6SHong Zhang PetscFunctionReturn(0); 18803c896bc6SHong Zhang } 1881273d9f13SBarry Smith 18824a2ae208SSatish Balay #undef __FUNCT__ 18834a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1884dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1885273d9f13SBarry Smith { 1886dfbe8321SBarry Smith PetscErrorCode ierr; 1887273d9f13SBarry Smith 1888273d9f13SBarry Smith PetscFunctionBegin; 18897edd0491SSatish Balay ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1890273d9f13SBarry Smith PetscFunctionReturn(0); 1891273d9f13SBarry Smith } 1892273d9f13SBarry Smith 18934fe895cdSHong Zhang #include "petscblaslapack.h" 18944fe895cdSHong Zhang #undef __FUNCT__ 18954fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ" 18964fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 18974fe895cdSHong Zhang { 18984fe895cdSHong Zhang PetscErrorCode ierr; 18994fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 19004fe895cdSHong Zhang PetscBLASInt bnz,one=1; 19014fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 19024fe895cdSHong Zhang 19034fe895cdSHong Zhang PetscFunctionBegin; 19044fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 19054fe895cdSHong Zhang PetscScalar alpha = a; 19064fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->A->data; 19074fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->A->data; 19084fe895cdSHong Zhang bnz = (PetscBLASInt)x->nz; 19094fe895cdSHong Zhang BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 19104fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->B->data; 19114fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->B->data; 19124fe895cdSHong Zhang bnz = (PetscBLASInt)x->nz; 19134fe895cdSHong Zhang BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 19144fe895cdSHong Zhang } else { 19154fe895cdSHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 19164fe895cdSHong Zhang } 19174fe895cdSHong Zhang PetscFunctionReturn(0); 19184fe895cdSHong Zhang } 19194fe895cdSHong Zhang 192099cafbc1SBarry Smith #undef __FUNCT__ 192199cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ" 192299cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 192399cafbc1SBarry Smith { 192499cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 192599cafbc1SBarry Smith PetscErrorCode ierr; 192699cafbc1SBarry Smith 192799cafbc1SBarry Smith PetscFunctionBegin; 192899cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 192999cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 193099cafbc1SBarry Smith PetscFunctionReturn(0); 193199cafbc1SBarry Smith } 193299cafbc1SBarry Smith 193399cafbc1SBarry Smith #undef __FUNCT__ 193499cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 193599cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 193699cafbc1SBarry Smith { 193799cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 193899cafbc1SBarry Smith PetscErrorCode ierr; 193999cafbc1SBarry Smith 194099cafbc1SBarry Smith PetscFunctionBegin; 194199cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 194299cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 194399cafbc1SBarry Smith PetscFunctionReturn(0); 194499cafbc1SBarry Smith } 194599cafbc1SBarry Smith 194679bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1947cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1948cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1949cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1950cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1951cc2dc46cSBarry Smith MatMult_MPIBAIJ, 195297304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 19537c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19547c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1955cc2dc46cSBarry Smith 0, 1956cc2dc46cSBarry Smith 0, 1957cc2dc46cSBarry Smith 0, 195897304618SKris Buschelman /*10*/ 0, 1959cc2dc46cSBarry Smith 0, 1960cc2dc46cSBarry Smith 0, 1961cc2dc46cSBarry Smith 0, 1962cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 196397304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 19647fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1965cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1966cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1967cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 196897304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1969cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1970cc2dc46cSBarry Smith 0, 1971cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1972cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 197397304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 1974cc2dc46cSBarry Smith 0, 1975cc2dc46cSBarry Smith 0, 1976cc2dc46cSBarry Smith 0, 1977cc2dc46cSBarry Smith 0, 197897304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 1979273d9f13SBarry Smith 0, 1980cc2dc46cSBarry Smith 0, 1981cc2dc46cSBarry Smith 0, 1982cc2dc46cSBarry Smith 0, 198397304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 1984cc2dc46cSBarry Smith 0, 1985cc2dc46cSBarry Smith 0, 1986cc2dc46cSBarry Smith 0, 1987cc2dc46cSBarry Smith 0, 19884fe895cdSHong Zhang /*40*/ MatAXPY_MPIBAIJ, 1989cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1990cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1991cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 19923c896bc6SHong Zhang MatCopy_MPIBAIJ, 19938c07d4e3SBarry Smith /*45*/ 0, 1994cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1995cc2dc46cSBarry Smith 0, 1996cc2dc46cSBarry Smith 0, 1997cc2dc46cSBarry Smith 0, 1998521d7252SBarry Smith /*50*/ 0, 1999cc2dc46cSBarry Smith 0, 2000cc2dc46cSBarry Smith 0, 2001cc2dc46cSBarry Smith 0, 2002cc2dc46cSBarry Smith 0, 200397304618SKris Buschelman /*55*/ 0, 2004cc2dc46cSBarry Smith 0, 2005cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 2006cc2dc46cSBarry Smith 0, 2007cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 200897304618SKris Buschelman /*60*/ 0, 2009f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2010f14a1c24SBarry Smith MatView_MPIBAIJ, 2011357abbc8SBarry Smith 0, 20127843d17aSBarry Smith 0, 201397304618SKris Buschelman /*65*/ 0, 20147843d17aSBarry Smith 0, 20157843d17aSBarry Smith 0, 20167843d17aSBarry Smith 0, 20177843d17aSBarry Smith 0, 2018985db425SBarry Smith /*70*/ MatGetRowMaxAbs_MPIBAIJ, 20197843d17aSBarry Smith 0, 202097304618SKris Buschelman 0, 202197304618SKris Buschelman 0, 202297304618SKris Buschelman 0, 202397304618SKris Buschelman /*75*/ 0, 202497304618SKris Buschelman 0, 202597304618SKris Buschelman 0, 202697304618SKris Buschelman 0, 202797304618SKris Buschelman 0, 202897304618SKris Buschelman /*80*/ 0, 202997304618SKris Buschelman 0, 203097304618SKris Buschelman 0, 203197304618SKris Buschelman 0, 2032865e5f61SKris Buschelman MatLoad_MPIBAIJ, 2033865e5f61SKris Buschelman /*85*/ 0, 2034865e5f61SKris Buschelman 0, 2035865e5f61SKris Buschelman 0, 2036865e5f61SKris Buschelman 0, 2037865e5f61SKris Buschelman 0, 2038865e5f61SKris Buschelman /*90*/ 0, 2039865e5f61SKris Buschelman 0, 2040865e5f61SKris Buschelman 0, 2041865e5f61SKris Buschelman 0, 2042865e5f61SKris Buschelman 0, 2043865e5f61SKris Buschelman /*95*/ 0, 2044865e5f61SKris Buschelman 0, 2045865e5f61SKris Buschelman 0, 204699cafbc1SBarry Smith 0, 204799cafbc1SBarry Smith 0, 204899cafbc1SBarry Smith /*100*/0, 204999cafbc1SBarry Smith 0, 205099cafbc1SBarry Smith 0, 205199cafbc1SBarry Smith 0, 205299cafbc1SBarry Smith 0, 205399cafbc1SBarry Smith /*105*/0, 205499cafbc1SBarry Smith MatRealPart_MPIBAIJ, 205599cafbc1SBarry Smith MatImaginaryPart_MPIBAIJ}; 205679bdfe76SSatish Balay 20575ef9f2a5SBarry Smith 2058e18c124aSSatish Balay EXTERN_C_BEGIN 20594a2ae208SSatish Balay #undef __FUNCT__ 20604a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2061be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20625ef9f2a5SBarry Smith { 20635ef9f2a5SBarry Smith PetscFunctionBegin; 20645ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20655ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20665ef9f2a5SBarry Smith PetscFunctionReturn(0); 20675ef9f2a5SBarry Smith } 2068e18c124aSSatish Balay EXTERN_C_END 206979bdfe76SSatish Balay 2070273d9f13SBarry Smith EXTERN_C_BEGIN 2071f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2072d94109b8SHong Zhang EXTERN_C_END 2073d94109b8SHong Zhang 2074aac34f13SBarry Smith #undef __FUNCT__ 2075aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2076b7940d39SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2077aac34f13SBarry Smith { 2078899cda47SBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2079899cda47SBarry Smith PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2080899cda47SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2081aac34f13SBarry Smith const PetscInt *JJ; 2082aac34f13SBarry Smith PetscScalar *values; 2083aac34f13SBarry Smith PetscErrorCode ierr; 2084aac34f13SBarry Smith 2085aac34f13SBarry Smith PetscFunctionBegin; 2086aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2087b7940d39SSatish Balay if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2088aac34f13SBarry Smith #endif 2089aac34f13SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2090aac34f13SBarry Smith o_nnz = d_nnz + m; 2091aac34f13SBarry Smith 2092aac34f13SBarry Smith for (i=0; i<m; i++) { 2093b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 2094b7940d39SSatish Balay JJ = J + Ii[i]; 2095aac34f13SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 2096aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2097aac34f13SBarry Smith if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2098aac34f13SBarry Smith #endif 2099aac34f13SBarry Smith for (j=0; j<nnz; j++) { 2100aac34f13SBarry Smith if (*JJ >= cstart) break; 2101aac34f13SBarry Smith JJ++; 2102aac34f13SBarry Smith } 2103aac34f13SBarry Smith d = 0; 2104aac34f13SBarry Smith for (; j<nnz; j++) { 2105aac34f13SBarry Smith if (*JJ++ >= cend) break; 2106aac34f13SBarry Smith d++; 2107aac34f13SBarry Smith } 2108aac34f13SBarry Smith d_nnz[i] = d; 2109aac34f13SBarry Smith o_nnz[i] = nnz - d; 2110aac34f13SBarry Smith } 2111aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2112aac34f13SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2113aac34f13SBarry Smith 2114aac34f13SBarry Smith if (v) values = (PetscScalar*)v; 2115aac34f13SBarry Smith else { 2116aac34f13SBarry Smith ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2117aac34f13SBarry Smith ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2118aac34f13SBarry Smith } 2119aac34f13SBarry Smith 2120*4e0d8c25SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr); 2121aac34f13SBarry Smith for (i=0; i<m; i++) { 2122aac34f13SBarry Smith ii = i + rstart; 2123b7940d39SSatish Balay nnz = Ii[i+1]- Ii[i]; 2124b7940d39SSatish Balay ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2125aac34f13SBarry Smith } 2126aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2127aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128*4e0d8c25SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_FALSE);CHKERRQ(ierr); 2129aac34f13SBarry Smith 2130aac34f13SBarry Smith if (!v) { 2131aac34f13SBarry Smith ierr = PetscFree(values);CHKERRQ(ierr); 2132aac34f13SBarry Smith } 2133aac34f13SBarry Smith PetscFunctionReturn(0); 2134aac34f13SBarry Smith } 2135aac34f13SBarry Smith 2136aac34f13SBarry Smith #undef __FUNCT__ 2137aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2138aac34f13SBarry Smith /*@C 2139aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2140aac34f13SBarry Smith (the default parallel PETSc format). 2141aac34f13SBarry Smith 2142aac34f13SBarry Smith Collective on MPI_Comm 2143aac34f13SBarry Smith 2144aac34f13SBarry Smith Input Parameters: 2145aac34f13SBarry Smith + A - the matrix 2146aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2147aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2148aac34f13SBarry Smith - v - optional values in the matrix 2149aac34f13SBarry Smith 2150aac34f13SBarry Smith Level: developer 2151aac34f13SBarry Smith 2152aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2153aac34f13SBarry Smith 2154aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2155aac34f13SBarry Smith @*/ 2156be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2157aac34f13SBarry Smith { 2158aac34f13SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2159aac34f13SBarry Smith 2160aac34f13SBarry Smith PetscFunctionBegin; 2161aac34f13SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2162aac34f13SBarry Smith if (f) { 2163aac34f13SBarry Smith ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2164aac34f13SBarry Smith } 2165aac34f13SBarry Smith PetscFunctionReturn(0); 2166aac34f13SBarry Smith } 2167aac34f13SBarry Smith 2168d94109b8SHong Zhang EXTERN_C_BEGIN 21694a2ae208SSatish Balay #undef __FUNCT__ 2170a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2171be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2172a23d5eceSKris Buschelman { 2173a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2174dfbe8321SBarry Smith PetscErrorCode ierr; 2175b24ad042SBarry Smith PetscInt i; 2176a23d5eceSKris Buschelman 2177a23d5eceSKris Buschelman PetscFunctionBegin; 2178a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 21798c07d4e3SBarry Smith ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 21808c07d4e3SBarry Smith ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 21818c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2182a23d5eceSKris Buschelman 2183a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2184a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2185a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 218677431f27SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 218777431f27SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2188899cda47SBarry Smith 2189899cda47SBarry Smith B->rmap.bs = bs; 2190899cda47SBarry Smith B->cmap.bs = bs; 21916148ca0dSBarry Smith ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 21926148ca0dSBarry Smith ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2193899cda47SBarry Smith 2194a23d5eceSKris Buschelman if (d_nnz) { 2195899cda47SBarry Smith for (i=0; i<B->rmap.n/bs; i++) { 219677431f27SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2197a23d5eceSKris Buschelman } 2198a23d5eceSKris Buschelman } 2199a23d5eceSKris Buschelman if (o_nnz) { 2200899cda47SBarry Smith for (i=0; i<B->rmap.n/bs; i++) { 220177431f27SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2202a23d5eceSKris Buschelman } 2203a23d5eceSKris Buschelman } 2204a23d5eceSKris Buschelman 2205a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2206a23d5eceSKris Buschelman b->bs2 = bs*bs; 2207899cda47SBarry Smith b->mbs = B->rmap.n/bs; 2208899cda47SBarry Smith b->nbs = B->cmap.n/bs; 2209899cda47SBarry Smith b->Mbs = B->rmap.N/bs; 2210899cda47SBarry Smith b->Nbs = B->cmap.N/bs; 2211a23d5eceSKris Buschelman 2212a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2213899cda47SBarry Smith b->rangebs[i] = B->rmap.range[i]/bs; 2214a23d5eceSKris Buschelman } 2215899cda47SBarry Smith b->rstartbs = B->rmap.rstart/bs; 2216899cda47SBarry Smith b->rendbs = B->rmap.rend/bs; 2217899cda47SBarry Smith b->cstartbs = B->cmap.rstart/bs; 2218899cda47SBarry Smith b->cendbs = B->cmap.rend/bs; 2219a23d5eceSKris Buschelman 2220f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2221899cda47SBarry Smith ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 22229c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2223c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 222452e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2225f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2226899cda47SBarry Smith ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 22279c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2228c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 222952e6d16bSBarry Smith ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2230c60e587dSKris Buschelman 2231a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2232a23d5eceSKris Buschelman 2233a23d5eceSKris Buschelman PetscFunctionReturn(0); 2234a23d5eceSKris Buschelman } 2235a23d5eceSKris Buschelman EXTERN_C_END 2236a23d5eceSKris Buschelman 2237a23d5eceSKris Buschelman EXTERN_C_BEGIN 2238be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2239be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 224092b32695SKris Buschelman EXTERN_C_END 22415bf65638SKris Buschelman 22420bad9183SKris Buschelman /*MC 2243fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 22440bad9183SKris Buschelman 22450bad9183SKris Buschelman Options Database Keys: 22468c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 22478c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 22488c07d4e3SBarry Smith - -mat_use_hash_table <fact> 22490bad9183SKris Buschelman 22500bad9183SKris Buschelman Level: beginner 22510bad9183SKris Buschelman 22520bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 22530bad9183SKris Buschelman M*/ 22540bad9183SKris Buschelman 225592b32695SKris Buschelman EXTERN_C_BEGIN 2256a23d5eceSKris Buschelman #undef __FUNCT__ 22574a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2258be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2259273d9f13SBarry Smith { 2260273d9f13SBarry Smith Mat_MPIBAIJ *b; 2261dfbe8321SBarry Smith PetscErrorCode ierr; 2262273d9f13SBarry Smith PetscTruth flg; 2263273d9f13SBarry Smith 2264273d9f13SBarry Smith PetscFunctionBegin; 226538f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 226682502324SSatish Balay B->data = (void*)b; 226782502324SSatish Balay 2268085a36d4SBarry Smith 2269273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2270273d9f13SBarry Smith B->mapping = 0; 2271273d9f13SBarry Smith B->factor = 0; 2272273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2273273d9f13SBarry Smith 2274273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2275273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2276273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2277273d9f13SBarry Smith 2278273d9f13SBarry Smith /* build local table of row and column ownerships */ 2279899cda47SBarry Smith ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2280273d9f13SBarry Smith 2281273d9f13SBarry Smith /* build cache for off array entries formed */ 2282273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2283273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2284273d9f13SBarry Smith b->colmap = PETSC_NULL; 2285273d9f13SBarry Smith b->garray = PETSC_NULL; 2286273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2287273d9f13SBarry Smith 2288cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2289273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 229064a35ccbSBarry Smith b->setvalueslen = 0; 2291273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2292273d9f13SBarry Smith #endif 2293273d9f13SBarry Smith 2294273d9f13SBarry Smith /* stuff used in block assembly */ 2295273d9f13SBarry Smith b->barray = 0; 2296273d9f13SBarry Smith 2297273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2298273d9f13SBarry Smith b->lvec = 0; 2299273d9f13SBarry Smith b->Mvctx = 0; 2300273d9f13SBarry Smith 2301273d9f13SBarry Smith /* stuff for MatGetRow() */ 2302273d9f13SBarry Smith b->rowindices = 0; 2303273d9f13SBarry Smith b->rowvalues = 0; 2304273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2305273d9f13SBarry Smith 2306273d9f13SBarry Smith /* hash table stuff */ 2307273d9f13SBarry Smith b->ht = 0; 2308273d9f13SBarry Smith b->hd = 0; 2309273d9f13SBarry Smith b->ht_size = 0; 2310273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2311273d9f13SBarry Smith b->ht_fact = 0; 2312273d9f13SBarry Smith b->ht_total_ct = 0; 2313273d9f13SBarry Smith b->ht_insert_ct = 0; 2314273d9f13SBarry Smith 231577925062SSatish Balay ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 23168c07d4e3SBarry Smith ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2317273d9f13SBarry Smith if (flg) { 2318f6275e2eSBarry Smith PetscReal fact = 1.39; 2319*4e0d8c25SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 23208c07d4e3SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2321273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2322273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2323ae15b995SBarry Smith ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2324273d9f13SBarry Smith } 23258c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 23268c07d4e3SBarry Smith 2327273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2328273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2329273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2330273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2331273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2332273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2333273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2334273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2335273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2336a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2337a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2338a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2339aac34f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 234044ec7894SLisandro Dalcin "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2341aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 234292b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 234392b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 234492b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 23455bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 23465bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 23475bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 234817667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2349273d9f13SBarry Smith PetscFunctionReturn(0); 2350273d9f13SBarry Smith } 2351273d9f13SBarry Smith EXTERN_C_END 2352273d9f13SBarry Smith 2353209238afSKris Buschelman /*MC 2354002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2355209238afSKris Buschelman 2356209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2357209238afSKris Buschelman and MATMPIBAIJ otherwise. 2358209238afSKris Buschelman 2359209238afSKris Buschelman Options Database Keys: 2360209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2361209238afSKris Buschelman 2362209238afSKris Buschelman Level: beginner 2363209238afSKris Buschelman 2364aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2365209238afSKris Buschelman M*/ 2366209238afSKris Buschelman 2367209238afSKris Buschelman EXTERN_C_BEGIN 2368209238afSKris Buschelman #undef __FUNCT__ 2369209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2370be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2371dfbe8321SBarry Smith { 23726849ba73SBarry Smith PetscErrorCode ierr; 2373b24ad042SBarry Smith PetscMPIInt size; 2374209238afSKris Buschelman 2375209238afSKris Buschelman PetscFunctionBegin; 2376209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2377209238afSKris Buschelman if (size == 1) { 2378209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2379209238afSKris Buschelman } else { 2380209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2381209238afSKris Buschelman } 2382209238afSKris Buschelman PetscFunctionReturn(0); 2383209238afSKris Buschelman } 2384209238afSKris Buschelman EXTERN_C_END 2385209238afSKris Buschelman 23864a2ae208SSatish Balay #undef __FUNCT__ 23874a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2388273d9f13SBarry Smith /*@C 2389aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2390273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2391273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2392273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2393273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2394273d9f13SBarry Smith 2395273d9f13SBarry Smith Collective on Mat 2396273d9f13SBarry Smith 2397273d9f13SBarry Smith Input Parameters: 2398273d9f13SBarry Smith + A - the matrix 2399273d9f13SBarry Smith . bs - size of blockk 2400273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2401273d9f13SBarry Smith submatrix (same for all local rows) 2402273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2403273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2404273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2405273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2406273d9f13SBarry Smith submatrix (same for all local rows). 2407273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2408273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2409273d9f13SBarry Smith each block row) or PETSC_NULL. 2410273d9f13SBarry Smith 241149a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 2412273d9f13SBarry Smith 2413273d9f13SBarry Smith Options Database Keys: 24148c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 24158c07d4e3SBarry Smith - -mat_use_hash_table <fact> 2416273d9f13SBarry Smith 2417273d9f13SBarry Smith Notes: 2418273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2419273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2420273d9f13SBarry Smith 2421273d9f13SBarry Smith Storage Information: 2422273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2423273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2424273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2425273d9f13SBarry Smith local matrix (a rectangular submatrix). 2426273d9f13SBarry Smith 2427273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2428273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2429273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2430273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2431273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2432273d9f13SBarry Smith 2433273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2434273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2435273d9f13SBarry Smith 2436273d9f13SBarry Smith .vb 2437273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2438273d9f13SBarry Smith ------------------- 2439273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2440273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2441273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2442273d9f13SBarry Smith ------------------- 2443273d9f13SBarry Smith .ve 2444273d9f13SBarry Smith 2445273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2446273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2447273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2448273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2449273d9f13SBarry Smith 2450273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2451273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2452273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2453273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2454273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2455273d9f13SBarry Smith matrices. 2456273d9f13SBarry Smith 2457273d9f13SBarry Smith Level: intermediate 2458273d9f13SBarry Smith 2459273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2460273d9f13SBarry Smith 2461aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2462273d9f13SBarry Smith @*/ 2463be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2464273d9f13SBarry Smith { 2465b24ad042SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2466273d9f13SBarry Smith 2467273d9f13SBarry Smith PetscFunctionBegin; 2468a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2469a23d5eceSKris Buschelman if (f) { 2470a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2471273d9f13SBarry Smith } 2472273d9f13SBarry Smith PetscFunctionReturn(0); 2473273d9f13SBarry Smith } 2474273d9f13SBarry Smith 24754a2ae208SSatish Balay #undef __FUNCT__ 24764a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 247779bdfe76SSatish Balay /*@C 247879bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 247979bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 248079bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 248179bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 248279bdfe76SSatish Balay performance can be increased by more than a factor of 50. 248379bdfe76SSatish Balay 2484db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2485db81eaa0SLois Curfman McInnes 248679bdfe76SSatish Balay Input Parameters: 2487db81eaa0SLois Curfman McInnes + comm - MPI communicator 248879bdfe76SSatish Balay . bs - size of blockk 248979bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 249092e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 249192e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 249292e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 249392e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 249492e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2495be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2496be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 249747a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 249879bdfe76SSatish Balay submatrix (same for all local rows) 249947a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 250092e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2501db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 250247a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 250379bdfe76SSatish Balay submatrix (same for all local rows). 250447a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 250592e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 250692e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 250779bdfe76SSatish Balay 250879bdfe76SSatish Balay Output Parameter: 250979bdfe76SSatish Balay . A - the matrix 251079bdfe76SSatish Balay 2511db81eaa0SLois Curfman McInnes Options Database Keys: 25128c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 25138c07d4e3SBarry Smith - -mat_use_hash_table <fact> 25143ffaccefSLois Curfman McInnes 2515b259b22eSLois Curfman McInnes Notes: 251649a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 251749a6f317SBarry Smith 251847a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 251947a75d0bSBarry Smith 252079bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 252179bdfe76SSatish Balay (possibly both). 252279bdfe76SSatish Balay 2523be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2524be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2525be79a94dSBarry Smith 252679bdfe76SSatish Balay Storage Information: 252779bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 252879bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 252979bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 253079bdfe76SSatish Balay local matrix (a rectangular submatrix). 253179bdfe76SSatish Balay 253279bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 253379bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 253479bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 253579bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 253679bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 253779bdfe76SSatish Balay 253879bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 253979bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 254079bdfe76SSatish Balay 2541db81eaa0SLois Curfman McInnes .vb 2542db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2543db81eaa0SLois Curfman McInnes ------------------- 2544db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2545db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2546db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2547db81eaa0SLois Curfman McInnes ------------------- 2548db81eaa0SLois Curfman McInnes .ve 254979bdfe76SSatish Balay 255079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 255179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 255279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 255357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 255479bdfe76SSatish Balay 2555d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2556d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 255779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 255892e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 255992e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 25606da5968aSLois Curfman McInnes matrices. 256179bdfe76SSatish Balay 2562027ccd11SLois Curfman McInnes Level: intermediate 2563027ccd11SLois Curfman McInnes 256492e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 256579bdfe76SSatish Balay 2566aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 256779bdfe76SSatish Balay @*/ 2568be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 256979bdfe76SSatish Balay { 25706849ba73SBarry Smith PetscErrorCode ierr; 2571b24ad042SBarry Smith PetscMPIInt size; 257279bdfe76SSatish Balay 2573d64ed03dSBarry Smith PetscFunctionBegin; 2574f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2575f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2576d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2577273d9f13SBarry Smith if (size > 1) { 2578273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2579273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2580273d9f13SBarry Smith } else { 2581273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2582273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 25833914022bSBarry Smith } 25843a40ed3dSBarry Smith PetscFunctionReturn(0); 258579bdfe76SSatish Balay } 2586026e39d0SSatish Balay 25874a2ae208SSatish Balay #undef __FUNCT__ 25884a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 25896849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 25900ac07820SSatish Balay { 25910ac07820SSatish Balay Mat mat; 25920ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2593dfbe8321SBarry Smith PetscErrorCode ierr; 2594b24ad042SBarry Smith PetscInt len=0; 25950ac07820SSatish Balay 2596d64ed03dSBarry Smith PetscFunctionBegin; 25970ac07820SSatish Balay *newmat = 0; 2598f69a0ea3SMatthew Knepley ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2599899cda47SBarry Smith ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2600be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 26011d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 26027fff6886SHong Zhang 26034beb1cfeSHong Zhang mat->factor = matin->factor; 2604273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 26050ac07820SSatish Balay mat->assembled = PETSC_TRUE; 26067fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 26077fff6886SHong Zhang 2608273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 2609899cda47SBarry Smith mat->rmap.bs = matin->rmap.bs; 26100ac07820SSatish Balay a->bs2 = oldmat->bs2; 26110ac07820SSatish Balay a->mbs = oldmat->mbs; 26120ac07820SSatish Balay a->nbs = oldmat->nbs; 26130ac07820SSatish Balay a->Mbs = oldmat->Mbs; 26140ac07820SSatish Balay a->Nbs = oldmat->Nbs; 26150ac07820SSatish Balay 2616899cda47SBarry Smith ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2617899cda47SBarry Smith ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2618899cda47SBarry Smith 26190ac07820SSatish Balay a->size = oldmat->size; 26200ac07820SSatish Balay a->rank = oldmat->rank; 2621aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2622aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2623aef5e8e0SSatish Balay a->rowindices = 0; 26240ac07820SSatish Balay a->rowvalues = 0; 26250ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 262630793edcSSatish Balay a->barray = 0; 2627899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2628899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2629899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2630899cda47SBarry Smith a->cendbs = oldmat->cendbs; 26310ac07820SSatish Balay 2632133cdb44SSatish Balay /* hash table stuff */ 2633133cdb44SSatish Balay a->ht = 0; 2634133cdb44SSatish Balay a->hd = 0; 2635133cdb44SSatish Balay a->ht_size = 0; 2636133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 263725fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2638133cdb44SSatish Balay a->ht_total_ct = 0; 2639133cdb44SSatish Balay a->ht_insert_ct = 0; 2640133cdb44SSatish Balay 2641899cda47SBarry Smith ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 26428798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2643899cda47SBarry Smith ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 26440ac07820SSatish Balay if (oldmat->colmap) { 2645aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 26460f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 264748e59246SSatish Balay #else 2648b24ad042SBarry Smith ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 264952e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2650b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 265148e59246SSatish Balay #endif 26520ac07820SSatish Balay } else a->colmap = 0; 26534beb1cfeSHong Zhang 26540ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2655b24ad042SBarry Smith ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 265652e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2657b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 26580ac07820SSatish Balay } else a->garray = 0; 26590ac07820SSatish Balay 26600ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 266152e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 26620ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 266352e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 26647fff6886SHong Zhang 26652e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 266652e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 26672e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 266852e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2669b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 26700ac07820SSatish Balay *newmat = mat; 26714beb1cfeSHong Zhang 26723a40ed3dSBarry Smith PetscFunctionReturn(0); 26730ac07820SSatish Balay } 267457b952d6SSatish Balay 2675e090d566SSatish Balay #include "petscsys.h" 267657b952d6SSatish Balay 26774a2ae208SSatish Balay #undef __FUNCT__ 26784a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2679f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 268057b952d6SSatish Balay { 268157b952d6SSatish Balay Mat A; 26826849ba73SBarry Smith PetscErrorCode ierr; 2683b24ad042SBarry Smith int fd; 2684b24ad042SBarry Smith PetscInt i,nz,j,rstart,rend; 268587828ca2SBarry Smith PetscScalar *vals,*buf; 268657b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 268757b952d6SSatish Balay MPI_Status status; 2688b24ad042SBarry Smith PetscMPIInt rank,size,maxnz; 2689b24ad042SBarry Smith PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2690910ba992SMatthew Knepley PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2691167e7480SBarry Smith PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2692dc231df0SBarry Smith PetscMPIInt tag = ((PetscObject)viewer)->tag; 2693910ba992SMatthew Knepley PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2694dc231df0SBarry Smith PetscInt dcount,kmax,k,nzcount,tmp,mend; 269557b952d6SSatish Balay 2696d64ed03dSBarry Smith PetscFunctionBegin; 269777925062SSatish Balay ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 26988c07d4e3SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 26998c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 270057b952d6SSatish Balay 2701d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2702d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 270357b952d6SSatish Balay if (!rank) { 2704b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2705e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2706552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 27076c5fab8fSBarry Smith } 2708d64ed03dSBarry Smith 2709b24ad042SBarry Smith ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 271057b952d6SSatish Balay M = header[1]; N = header[2]; 271157b952d6SSatish Balay 271229bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 271357b952d6SSatish Balay 271457b952d6SSatish Balay /* 271557b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 271657b952d6SSatish Balay divisible by the blocksize 271757b952d6SSatish Balay */ 271857b952d6SSatish Balay Mbs = M/bs; 2719dc231df0SBarry Smith extra_rows = bs - M + bs*Mbs; 272057b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 272157b952d6SSatish Balay else Mbs++; 272257b952d6SSatish Balay if (extra_rows && !rank) { 2723ae15b995SBarry Smith ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 272457b952d6SSatish Balay } 2725537820f0SBarry Smith 272657b952d6SSatish Balay /* determine ownership of all rows */ 272757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 272857b952d6SSatish Balay m = mbs*bs; 2729dc231df0SBarry Smith ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2730b24ad042SBarry Smith ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2731167e7480SBarry Smith 2732167e7480SBarry Smith /* process 0 needs enough room for process with most rows */ 2733167e7480SBarry Smith if (!rank) { 2734167e7480SBarry Smith mmax = rowners[1]; 2735167e7480SBarry Smith for (i=2; i<size; i++) { 2736167e7480SBarry Smith mmax = PetscMax(mmax,rowners[i]); 2737167e7480SBarry Smith } 2738ca02efcfSSatish Balay mmax*=bs; 2739167e7480SBarry Smith } else mmax = m; 2740167e7480SBarry Smith 274157b952d6SSatish Balay rowners[0] = 0; 2742cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2743cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 274457b952d6SSatish Balay rstart = rowners[rank]; 274557b952d6SSatish Balay rend = rowners[rank+1]; 274657b952d6SSatish Balay 274757b952d6SSatish Balay /* distribute row lengths to all processors */ 274819c38ff2SBarry Smith ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 274957b952d6SSatish Balay if (!rank) { 2750dc231df0SBarry Smith mend = m; 2751dc231df0SBarry Smith if (size == 1) mend = mend - extra_rows; 2752dc231df0SBarry Smith ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2753dc231df0SBarry Smith for (j=mend; j<m; j++) locrowlens[j] = 1; 2754dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2755b24ad042SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2756b24ad042SBarry Smith ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2757dc231df0SBarry Smith for (j=0; j<m; j++) { 2758dc231df0SBarry Smith procsnz[0] += locrowlens[j]; 2759dc231df0SBarry Smith } 2760dc231df0SBarry Smith for (i=1; i<size; i++) { 2761dc231df0SBarry Smith mend = browners[i+1] - browners[i]; 2762dc231df0SBarry Smith if (i == size-1) mend = mend - extra_rows; 2763dc231df0SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2764dc231df0SBarry Smith for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2765dc231df0SBarry Smith /* calculate the number of nonzeros on each processor */ 2766dc231df0SBarry Smith for (j=0; j<browners[i+1]-browners[i]; j++) { 276757b952d6SSatish Balay procsnz[i] += rowlengths[j]; 276857b952d6SSatish Balay } 2769dc231df0SBarry Smith ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277057b952d6SSatish Balay } 2771606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2772dc231df0SBarry Smith } else { 2773dc231df0SBarry Smith ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2774dc231df0SBarry Smith } 277557b952d6SSatish Balay 2776dc231df0SBarry Smith if (!rank) { 277757b952d6SSatish Balay /* determine max buffer needed and allocate it */ 27788a8e0b3aSBarry Smith maxnz = procsnz[0]; 2779cdc0ba36SBarry Smith for (i=1; i<size; i++) { 278057b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 278157b952d6SSatish Balay } 2782b24ad042SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 278357b952d6SSatish Balay 278457b952d6SSatish Balay /* read in my part of the matrix column indices */ 278557b952d6SSatish Balay nz = procsnz[0]; 278619c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 278757b952d6SSatish Balay mycols = ibuf; 2788cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2789e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2790cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2791cee3aa6bSSatish Balay 279257b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 279357b952d6SSatish Balay for (i=1; i<size-1; i++) { 279457b952d6SSatish Balay nz = procsnz[i]; 2795e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2796b24ad042SBarry Smith ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 279757b952d6SSatish Balay } 279857b952d6SSatish Balay /* read in the stuff for the last proc */ 279957b952d6SSatish Balay if (size != 1) { 280057b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2801e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 280257b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2803b24ad042SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 280457b952d6SSatish Balay } 2805606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2806d64ed03dSBarry Smith } else { 280757b952d6SSatish Balay /* determine buffer space needed for message */ 280857b952d6SSatish Balay nz = 0; 280957b952d6SSatish Balay for (i=0; i<m; i++) { 281057b952d6SSatish Balay nz += locrowlens[i]; 281157b952d6SSatish Balay } 281219c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 281357b952d6SSatish Balay mycols = ibuf; 281457b952d6SSatish Balay /* receive message of column indices*/ 2815b24ad042SBarry Smith ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2816b24ad042SBarry Smith ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 281729bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 281857b952d6SSatish Balay } 281957b952d6SSatish Balay 282057b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2821dc231df0SBarry Smith ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2822dc231df0SBarry Smith ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2823dc231df0SBarry Smith ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2824dc231df0SBarry Smith ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2825dc231df0SBarry Smith ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 282657b952d6SSatish Balay rowcount = 0; nzcount = 0; 282757b952d6SSatish Balay for (i=0; i<mbs; i++) { 282857b952d6SSatish Balay dcount = 0; 282957b952d6SSatish Balay odcount = 0; 283057b952d6SSatish Balay for (j=0; j<bs; j++) { 283157b952d6SSatish Balay kmax = locrowlens[rowcount]; 283257b952d6SSatish Balay for (k=0; k<kmax; k++) { 283357b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 283457b952d6SSatish Balay if (!mask[tmp]) { 283557b952d6SSatish Balay mask[tmp] = 1; 283657b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 283757b952d6SSatish Balay else masked1[dcount++] = tmp; 283857b952d6SSatish Balay } 283957b952d6SSatish Balay } 284057b952d6SSatish Balay rowcount++; 284157b952d6SSatish Balay } 2842cee3aa6bSSatish Balay 284357b952d6SSatish Balay dlens[i] = dcount; 284457b952d6SSatish Balay odlens[i] = odcount; 2845cee3aa6bSSatish Balay 284657b952d6SSatish Balay /* zero out the mask elements we set */ 284757b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 284857b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 284957b952d6SSatish Balay } 2850cee3aa6bSSatish Balay 285157b952d6SSatish Balay /* create our matrix */ 2852f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2853f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 285478ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 285578ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 285678ae41b4SKris Buschelman 285778ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2858*4e0d8c25SBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr); 285957b952d6SSatish Balay 286057b952d6SSatish Balay if (!rank) { 286119c38ff2SBarry Smith ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 286257b952d6SSatish Balay /* read in my part of the matrix numerical values */ 286357b952d6SSatish Balay nz = procsnz[0]; 286457b952d6SSatish Balay vals = buf; 2865cee3aa6bSSatish Balay mycols = ibuf; 2866cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2867e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2868cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2869537820f0SBarry Smith 287057b952d6SSatish Balay /* insert into matrix */ 287157b952d6SSatish Balay jj = rstart*bs; 287257b952d6SSatish Balay for (i=0; i<m; i++) { 2873dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 287457b952d6SSatish Balay mycols += locrowlens[i]; 287557b952d6SSatish Balay vals += locrowlens[i]; 287657b952d6SSatish Balay jj++; 287757b952d6SSatish Balay } 287857b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 287957b952d6SSatish Balay for (i=1; i<size-1; i++) { 288057b952d6SSatish Balay nz = procsnz[i]; 288157b952d6SSatish Balay vals = buf; 2882e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2883ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 288457b952d6SSatish Balay } 288557b952d6SSatish Balay /* the last proc */ 288657b952d6SSatish Balay if (size != 1){ 288757b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2888cee3aa6bSSatish Balay vals = buf; 2889e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 289057b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2891ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 289257b952d6SSatish Balay } 2893606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2894d64ed03dSBarry Smith } else { 289557b952d6SSatish Balay /* receive numeric values */ 289619c38ff2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 289757b952d6SSatish Balay 289857b952d6SSatish Balay /* receive message of values*/ 289957b952d6SSatish Balay vals = buf; 2900cee3aa6bSSatish Balay mycols = ibuf; 2901ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2902ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 290329bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 290457b952d6SSatish Balay 290557b952d6SSatish Balay /* insert into matrix */ 290657b952d6SSatish Balay jj = rstart*bs; 2907cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2908dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 290957b952d6SSatish Balay mycols += locrowlens[i]; 291057b952d6SSatish Balay vals += locrowlens[i]; 291157b952d6SSatish Balay jj++; 291257b952d6SSatish Balay } 291357b952d6SSatish Balay } 2914606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2915606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2916606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2917dc231df0SBarry Smith ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2918dc231df0SBarry Smith ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2919dc231df0SBarry Smith ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 29206d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29216d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 292278ae41b4SKris Buschelman 292378ae41b4SKris Buschelman *newmat = A; 29243a40ed3dSBarry Smith PetscFunctionReturn(0); 292557b952d6SSatish Balay } 292657b952d6SSatish Balay 29274a2ae208SSatish Balay #undef __FUNCT__ 29284a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2929133cdb44SSatish Balay /*@ 2930133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2931133cdb44SSatish Balay 2932133cdb44SSatish Balay Input Parameters: 2933133cdb44SSatish Balay . mat - the matrix 2934133cdb44SSatish Balay . fact - factor 2935133cdb44SSatish Balay 2936fee21e36SBarry Smith Collective on Mat 2937fee21e36SBarry Smith 29388c890885SBarry Smith Level: advanced 29398c890885SBarry Smith 2940133cdb44SSatish Balay Notes: 29418c07d4e3SBarry Smith This can also be set by the command line option: -mat_use_hash_table <fact> 2942133cdb44SSatish Balay 2943133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2944133cdb44SSatish Balay 2945133cdb44SSatish Balay .seealso: MatSetOption() 2946133cdb44SSatish Balay @*/ 2947be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2948133cdb44SSatish Balay { 2949dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscReal); 29505bf65638SKris Buschelman 29515bf65638SKris Buschelman PetscFunctionBegin; 29525bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 29535bf65638SKris Buschelman if (f) { 29545bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 29555bf65638SKris Buschelman } 29565bf65638SKris Buschelman PetscFunctionReturn(0); 29575bf65638SKris Buschelman } 29585bf65638SKris Buschelman 2959be1d678aSKris Buschelman EXTERN_C_BEGIN 29605bf65638SKris Buschelman #undef __FUNCT__ 29615bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2962be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 29635bf65638SKris Buschelman { 296425fdafccSSatish Balay Mat_MPIBAIJ *baij; 2965133cdb44SSatish Balay 2966133cdb44SSatish Balay PetscFunctionBegin; 2967133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2968133cdb44SSatish Balay baij->ht_fact = fact; 2969133cdb44SSatish Balay PetscFunctionReturn(0); 2970133cdb44SSatish Balay } 2971be1d678aSKris Buschelman EXTERN_C_END 2972f2a5309cSSatish Balay 29734a2ae208SSatish Balay #undef __FUNCT__ 29744a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2975be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2976f2a5309cSSatish Balay { 2977f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2978f2a5309cSSatish Balay PetscFunctionBegin; 2979f2a5309cSSatish Balay *Ad = a->A; 2980f2a5309cSSatish Balay *Ao = a->B; 2981195d93cdSBarry Smith *colmap = a->garray; 2982f2a5309cSSatish Balay PetscFunctionReturn(0); 2983f2a5309cSSatish Balay } 2984