179bdfe76SSatish Balay 2e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 379bdfe76SSatish Balay 4dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 1593fea6afSBarry Smith 1693fea6afSBarry Smith /* UGLY, ugly, ugly 1787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 1893fea6afSBarry Smith not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 1993fea6afSBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 2093fea6afSBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 2193fea6afSBarry Smith into the single precision data structures. 2293fea6afSBarry Smith */ 2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 24b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 2993fea6afSBarry Smith #else 306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 3593fea6afSBarry Smith #endif 363b2fbd54SBarry Smith 374a2ae208SSatish Balay #undef __FUNCT__ 384a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 407843d17aSBarry Smith { 417843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42dfbe8321SBarry Smith PetscErrorCode ierr; 43b24ad042SBarry Smith PetscInt i; 4487828ca2SBarry Smith PetscScalar *va,*vb; 457843d17aSBarry Smith Vec vtmp; 467843d17aSBarry Smith 477843d17aSBarry Smith PetscFunctionBegin; 487843d17aSBarry Smith 497843d17aSBarry Smith ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 501ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 517843d17aSBarry Smith 52ac355199SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 537843d17aSBarry Smith ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 541ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 557843d17aSBarry Smith 56273d9f13SBarry Smith for (i=0; i<A->m; i++){ 57273d9f13SBarry Smith if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 587843d17aSBarry Smith } 597843d17aSBarry Smith 601ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 611ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62ac355199SBarry Smith ierr = VecDestroy(vtmp);CHKERRQ(ierr); 637843d17aSBarry Smith 647843d17aSBarry Smith PetscFunctionReturn(0); 657843d17aSBarry Smith } 667843d17aSBarry Smith 677fc3c18eSBarry Smith EXTERN_C_BEGIN 684a2ae208SSatish Balay #undef __FUNCT__ 694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70dfbe8321SBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 717fc3c18eSBarry Smith { 727fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73dfbe8321SBarry Smith PetscErrorCode ierr; 747fc3c18eSBarry Smith 757fc3c18eSBarry Smith PetscFunctionBegin; 767fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 777fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 787fc3c18eSBarry Smith PetscFunctionReturn(0); 797fc3c18eSBarry Smith } 807fc3c18eSBarry Smith EXTERN_C_END 817fc3c18eSBarry Smith 827fc3c18eSBarry Smith EXTERN_C_BEGIN 834a2ae208SSatish Balay #undef __FUNCT__ 844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85dfbe8321SBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 867fc3c18eSBarry Smith { 877fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88dfbe8321SBarry Smith PetscErrorCode ierr; 897fc3c18eSBarry Smith 907fc3c18eSBarry Smith PetscFunctionBegin; 917fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 927fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 937fc3c18eSBarry Smith PetscFunctionReturn(0); 947fc3c18eSBarry Smith } 957fc3c18eSBarry Smith EXTERN_C_END 967fc3c18eSBarry Smith 97537820f0SBarry Smith /* 98537820f0SBarry Smith Local utility routine that creates a mapping from the global column 9957b952d6SSatish Balay number to the local number in the off-diagonal part of the local 10057b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 10157b952d6SSatish Balay length of colmap equals the global matrix length. 10257b952d6SSatish Balay */ 1034a2ae208SSatish Balay #undef __FUNCT__ 1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 10657b952d6SSatish Balay { 10757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 10857b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 1096849ba73SBarry Smith PetscErrorCode ierr; 110521d7252SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->bs; 11157b952d6SSatish Balay 112d64ed03dSBarry Smith PetscFunctionBegin; 113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 114f14a1c24SBarry Smith ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 11548e59246SSatish Balay for (i=0; i<nbs; i++){ 1160f5bd95cSBarry Smith ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 11748e59246SSatish Balay } 11848e59246SSatish Balay #else 119b24ad042SBarry Smith ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120b24ad042SBarry Smith PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt)); 121b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 12348e59246SSatish Balay #endif 1243a40ed3dSBarry Smith PetscFunctionReturn(0); 12557b952d6SSatish Balay } 12657b952d6SSatish Balay 12780c1aa95SSatish Balay #define CHUNKSIZE 10 12880c1aa95SSatish Balay 129f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 13080c1aa95SSatish Balay { \ 13180c1aa95SSatish Balay \ 13280c1aa95SSatish Balay brow = row/bs; \ 13380c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 13580c1aa95SSatish Balay bcol = col/bs; \ 13680c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 137ab26458aSBarry Smith low = 0; high = nrow; \ 138ab26458aSBarry Smith while (high-low > 3) { \ 139ab26458aSBarry Smith t = (low+high)/2; \ 140ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 141ab26458aSBarry Smith else low = t; \ 142ab26458aSBarry Smith } \ 143ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 14480c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 14580c1aa95SSatish Balay if (rp[_i] == bcol) { \ 14680c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 147eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 148eada6651SSatish Balay else *bap = value; \ 149ac7a638eSSatish Balay goto a_noinsert; \ 15080c1aa95SSatish Balay } \ 15180c1aa95SSatish Balay } \ 15289280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 15377431f27SBarry Smith else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 15480c1aa95SSatish Balay if (nrow >= rmax) { \ 15580c1aa95SSatish Balay /* there is no extra room in row, therefore enlarge */ \ 156b24ad042SBarry Smith PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 1573eda8832SBarry Smith MatScalar *new_a; \ 15880c1aa95SSatish Balay \ 15977431f27SBarry Smith if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 16089280ab3SLois Curfman McInnes \ 16180c1aa95SSatish Balay /* malloc new storage space */ \ 162b24ad042SBarry Smith len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 16382502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164b24ad042SBarry Smith new_j = (PetscInt*)(new_a + bs2*new_nz); \ 16580c1aa95SSatish Balay new_i = new_j + new_nz; \ 16680c1aa95SSatish Balay \ 16780c1aa95SSatish Balay /* copy over old data into new slots */ \ 16880c1aa95SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 16980c1aa95SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170b24ad042SBarry Smith ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 17180c1aa95SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172b24ad042SBarry Smith ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 1733eda8832SBarry Smith ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 17487828ca2SBarry Smith ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 1763eda8832SBarry Smith aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 17780c1aa95SSatish Balay /* free up old matrix storage */ \ 178606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179606d414cSSatish Balay if (!a->singlemalloc) { \ 180606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182606d414cSSatish Balay } \ 18380c1aa95SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 1847c922b88SBarry Smith a->singlemalloc = PETSC_TRUE; \ 18580c1aa95SSatish Balay \ 18680c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187ac7a638eSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188b24ad042SBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 18980c1aa95SSatish Balay a->maxnz += bs2*CHUNKSIZE; \ 19080c1aa95SSatish Balay a->reallocs++; \ 19180c1aa95SSatish Balay a->nz++; \ 19280c1aa95SSatish Balay } \ 19380c1aa95SSatish Balay N = nrow++ - 1; \ 19480c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 19580c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 19680c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1973eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 19880c1aa95SSatish Balay } \ 1993eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 20080c1aa95SSatish Balay rp[_i] = bcol; \ 20180c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 202ac7a638eSSatish Balay a_noinsert:; \ 20380c1aa95SSatish Balay ailen[brow] = nrow; \ 20480c1aa95SSatish Balay } 20557b952d6SSatish Balay 206ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207ac7a638eSSatish Balay { \ 208ac7a638eSSatish Balay brow = row/bs; \ 209ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 211ac7a638eSSatish Balay bcol = col/bs; \ 212ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 213ac7a638eSSatish Balay low = 0; high = nrow; \ 214ac7a638eSSatish Balay while (high-low > 3) { \ 215ac7a638eSSatish Balay t = (low+high)/2; \ 216ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 217ac7a638eSSatish Balay else low = t; \ 218ac7a638eSSatish Balay } \ 219ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 220ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 221ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 222ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 223ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 224ac7a638eSSatish Balay else *bap = value; \ 225ac7a638eSSatish Balay goto b_noinsert; \ 226ac7a638eSSatish Balay } \ 227ac7a638eSSatish Balay } \ 22889280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 22977431f27SBarry Smith else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230ac7a638eSSatish Balay if (nrow >= rmax) { \ 231ac7a638eSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 232b24ad042SBarry Smith PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 2333eda8832SBarry Smith MatScalar *new_a; \ 234ac7a638eSSatish Balay \ 23577431f27SBarry Smith if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 23689280ab3SLois Curfman McInnes \ 237ac7a638eSSatish Balay /* malloc new storage space */ \ 238b24ad042SBarry Smith len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 23982502324SSatish Balay ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240b24ad042SBarry Smith new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241ac7a638eSSatish Balay new_i = new_j + new_nz; \ 242ac7a638eSSatish Balay \ 243ac7a638eSSatish Balay /* copy over old data into new slots */ \ 244ac7a638eSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 24574c639caSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246b24ad042SBarry Smith ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247ac7a638eSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248b24ad042SBarry Smith ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 2493eda8832SBarry Smith ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 2503eda8832SBarry Smith ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 2523eda8832SBarry Smith ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253ac7a638eSSatish Balay /* free up old matrix storage */ \ 254606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255606d414cSSatish Balay if (!b->singlemalloc) { \ 256606d414cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257606d414cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258606d414cSSatish Balay } \ 25974c639caSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 2607c922b88SBarry Smith b->singlemalloc = PETSC_TRUE; \ 261ac7a638eSSatish Balay \ 262ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263ac7a638eSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264b24ad042SBarry Smith PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 26574c639caSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 26674c639caSSatish Balay b->reallocs++; \ 26774c639caSSatish Balay b->nz++; \ 268ac7a638eSSatish Balay } \ 269ac7a638eSSatish Balay N = nrow++ - 1; \ 270ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 271ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 272ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 2733eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274ac7a638eSSatish Balay } \ 2753eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276ac7a638eSSatish Balay rp[_i] = bcol; \ 277ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 278ac7a638eSSatish Balay b_noinsert:; \ 279ac7a638eSSatish Balay bilen[brow] = nrow; \ 280ac7a638eSSatish Balay } 281ac7a638eSSatish Balay 28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 2834a2ae208SSatish Balay #undef __FUNCT__ 2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 285b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 28657b952d6SSatish Balay { 2876fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288dfbe8321SBarry Smith PetscErrorCode ierr; 289b24ad042SBarry Smith PetscInt i,N = m*n; 2906fa18ffdSBarry Smith MatScalar *vsingle; 29193fea6afSBarry Smith 29293fea6afSBarry Smith PetscFunctionBegin; 2936fa18ffdSBarry Smith if (N > b->setvalueslen) { 2946fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 29582502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 2966fa18ffdSBarry Smith b->setvalueslen = N; 2976fa18ffdSBarry Smith } 2986fa18ffdSBarry Smith vsingle = b->setvaluescopy; 2996fa18ffdSBarry Smith 30093fea6afSBarry Smith for (i=0; i<N; i++) { 30193fea6afSBarry Smith vsingle[i] = v[i]; 30293fea6afSBarry Smith } 30393fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 30493fea6afSBarry Smith PetscFunctionReturn(0); 30593fea6afSBarry Smith } 30693fea6afSBarry Smith 3074a2ae208SSatish Balay #undef __FUNCT__ 3084a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 31093fea6afSBarry Smith { 3116fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312dfbe8321SBarry Smith PetscErrorCode ierr; 313b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 3146fa18ffdSBarry Smith MatScalar *vsingle; 31593fea6afSBarry Smith 31693fea6afSBarry Smith PetscFunctionBegin; 3176fa18ffdSBarry Smith if (N > b->setvalueslen) { 3186fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 31982502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3206fa18ffdSBarry Smith b->setvalueslen = N; 3216fa18ffdSBarry Smith } 3226fa18ffdSBarry Smith vsingle = b->setvaluescopy; 32393fea6afSBarry Smith for (i=0; i<N; i++) { 32493fea6afSBarry Smith vsingle[i] = v[i]; 32593fea6afSBarry Smith } 32693fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 32793fea6afSBarry Smith PetscFunctionReturn(0); 32893fea6afSBarry Smith } 3296fa18ffdSBarry Smith 3304a2ae208SSatish Balay #undef __FUNCT__ 3314a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 3336fa18ffdSBarry Smith { 3346fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335dfbe8321SBarry Smith PetscErrorCode ierr; 336b24ad042SBarry Smith PetscInt i,N = m*n; 3376fa18ffdSBarry Smith MatScalar *vsingle; 3386fa18ffdSBarry Smith 3396fa18ffdSBarry Smith PetscFunctionBegin; 3406fa18ffdSBarry Smith if (N > b->setvalueslen) { 3416fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 34282502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3436fa18ffdSBarry Smith b->setvalueslen = N; 3446fa18ffdSBarry Smith } 3456fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3466fa18ffdSBarry Smith for (i=0; i<N; i++) { 3476fa18ffdSBarry Smith vsingle[i] = v[i]; 3486fa18ffdSBarry Smith } 3496fa18ffdSBarry Smith ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3506fa18ffdSBarry Smith PetscFunctionReturn(0); 3516fa18ffdSBarry Smith } 3526fa18ffdSBarry Smith 3534a2ae208SSatish Balay #undef __FUNCT__ 3544a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 3566fa18ffdSBarry Smith { 3576fa18ffdSBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358dfbe8321SBarry Smith PetscErrorCode ierr; 359b24ad042SBarry Smith PetscInt i,N = m*n*b->bs2; 3606fa18ffdSBarry Smith MatScalar *vsingle; 3616fa18ffdSBarry Smith 3626fa18ffdSBarry Smith PetscFunctionBegin; 3636fa18ffdSBarry Smith if (N > b->setvalueslen) { 3646fa18ffdSBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 36582502324SSatish Balay ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 3666fa18ffdSBarry Smith b->setvalueslen = N; 3676fa18ffdSBarry Smith } 3686fa18ffdSBarry Smith vsingle = b->setvaluescopy; 3696fa18ffdSBarry Smith for (i=0; i<N; i++) { 3706fa18ffdSBarry Smith vsingle[i] = v[i]; 3716fa18ffdSBarry Smith } 3726fa18ffdSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 3736fa18ffdSBarry Smith PetscFunctionReturn(0); 3746fa18ffdSBarry Smith } 37593fea6afSBarry Smith #endif 37693fea6afSBarry Smith 3774a2ae208SSatish Balay #undef __FUNCT__ 378e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 38093fea6afSBarry Smith { 38157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 38293fea6afSBarry Smith MatScalar value; 383273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 384dfbe8321SBarry Smith PetscErrorCode ierr; 385b24ad042SBarry Smith PetscInt i,j,row,col; 386b24ad042SBarry Smith PetscInt rstart_orig=baij->rstart_bs; 387b24ad042SBarry Smith PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388521d7252SBarry Smith PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 38957b952d6SSatish Balay 390eada6651SSatish Balay /* Some Variables required in the macro */ 39180c1aa95SSatish Balay Mat A = baij->A; 39280c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 3943eda8832SBarry Smith MatScalar *aa=a->a; 395ac7a638eSSatish Balay 396ac7a638eSSatish Balay Mat B = baij->B; 397ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 3993eda8832SBarry Smith MatScalar *ba=b->a; 400ac7a638eSSatish Balay 401b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 4033eda8832SBarry Smith MatScalar *ap,*bap; 40480c1aa95SSatish Balay 405d64ed03dSBarry Smith PetscFunctionBegin; 40657b952d6SSatish Balay for (i=0; i<m; i++) { 4075ef9f2a5SBarry Smith if (im[i] < 0) continue; 4082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 40977431f27SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410639f9d9dSBarry Smith #endif 41157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 41257b952d6SSatish Balay row = im[i] - rstart_orig; 41357b952d6SSatish Balay for (j=0; j<n; j++) { 41457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 41557b952d6SSatish Balay col = in[j] - cstart_orig; 41657b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 41880c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 41973959e64SBarry Smith } else if (in[j] < 0) continue; 4202515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 42177431f27SBarry Smith else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422639f9d9dSBarry Smith #endif 42357b952d6SSatish Balay else { 42457b952d6SSatish Balay if (mat->was_assembled) { 425905e6a2fSBarry Smith if (!baij->colmap) { 426905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427905e6a2fSBarry Smith } 428aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 4290f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430bba1ac68SSatish Balay col = col - 1; 43148e59246SSatish Balay #else 432bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 43348e59246SSatish Balay #endif 43457b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 43557b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4368295de27SSatish Balay col = in[j]; 4379bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 4389bf004c3SSatish Balay B = baij->B; 4399bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 4409bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 4419bf004c3SSatish Balay ba=b->a; 442bba1ac68SSatish Balay } else col += in[j]%bs; 4438295de27SSatish Balay } else col = in[j]; 44457b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 44590da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 44690da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 44757b952d6SSatish Balay } 44857b952d6SSatish Balay } 449d64ed03dSBarry Smith } else { 45090f02eecSBarry Smith if (!baij->donotstash) { 451ff2fd236SBarry Smith if (roworiented) { 4526fa18ffdSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453ff2fd236SBarry Smith } else { 4546fa18ffdSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 45557b952d6SSatish Balay } 45657b952d6SSatish Balay } 45757b952d6SSatish Balay } 45890f02eecSBarry Smith } 4593a40ed3dSBarry Smith PetscFunctionReturn(0); 46057b952d6SSatish Balay } 46157b952d6SSatish Balay 4624a2ae208SSatish Balay #undef __FUNCT__ 4634a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465ab26458aSBarry Smith { 466ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467f15d580aSBarry Smith const MatScalar *value; 468f15d580aSBarry Smith MatScalar *barray=baij->barray; 469273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 470dfbe8321SBarry Smith PetscErrorCode ierr; 471b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472b24ad042SBarry Smith PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473521d7252SBarry Smith PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474ab26458aSBarry Smith 475b16ae2b1SBarry Smith PetscFunctionBegin; 47630793edcSSatish Balay if(!barray) { 47782502324SSatish Balay ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 47882502324SSatish Balay baij->barray = barray; 47930793edcSSatish Balay } 48030793edcSSatish Balay 481ab26458aSBarry Smith if (roworiented) { 482ab26458aSBarry Smith stepval = (n-1)*bs; 483ab26458aSBarry Smith } else { 484ab26458aSBarry Smith stepval = (m-1)*bs; 485ab26458aSBarry Smith } 486ab26458aSBarry Smith for (i=0; i<m; i++) { 4875ef9f2a5SBarry Smith if (im[i] < 0) continue; 4882515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 48977431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490ab26458aSBarry Smith #endif 491ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 492ab26458aSBarry Smith row = im[i] - rstart; 493ab26458aSBarry Smith for (j=0; j<n; j++) { 49415b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 49515b57d14SSatish Balay if ((roworiented) && (n == 1)) { 496f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 49715b57d14SSatish Balay } else if((!roworiented) && (m == 1)) { 498f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 49915b57d14SSatish Balay } else { /* Here a copy is required */ 500ab26458aSBarry Smith if (roworiented) { 501ab26458aSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 502ab26458aSBarry Smith } else { 503ab26458aSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 504abef11f7SSatish Balay } 50547513183SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 50647513183SBarry Smith for (jj=0; jj<bs; jj++) { 50730793edcSSatish Balay *barray++ = *value++; 50847513183SBarry Smith } 50947513183SBarry Smith } 51030793edcSSatish Balay barray -=bs2; 51115b57d14SSatish Balay } 512abef11f7SSatish Balay 513abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend){ 514abef11f7SSatish Balay col = in[j] - cstart; 51593fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516ab26458aSBarry Smith } 5175ef9f2a5SBarry Smith else if (in[j] < 0) continue; 5182515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 51977431f27SBarry Smith else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520ab26458aSBarry Smith #endif 521ab26458aSBarry Smith else { 522ab26458aSBarry Smith if (mat->was_assembled) { 523ab26458aSBarry Smith if (!baij->colmap) { 524ab26458aSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525ab26458aSBarry Smith } 526a5eb4965SSatish Balay 5272515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 528aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 529b24ad042SBarry Smith { PetscInt data; 5300f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 53129bbc08cSBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532fa46199cSSatish Balay } 53348e59246SSatish Balay #else 53429bbc08cSBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535a5eb4965SSatish Balay #endif 53648e59246SSatish Balay #endif 537aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 5380f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539fa46199cSSatish Balay col = (col - 1)/bs; 54048e59246SSatish Balay #else 541a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 54248e59246SSatish Balay #endif 543ab26458aSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544ab26458aSBarry Smith ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545ab26458aSBarry Smith col = in[j]; 546ab26458aSBarry Smith } 547ab26458aSBarry Smith } 548ab26458aSBarry Smith else col = in[j]; 54993fea6afSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550ab26458aSBarry Smith } 551ab26458aSBarry Smith } 552d64ed03dSBarry Smith } else { 553ab26458aSBarry Smith if (!baij->donotstash) { 554ff2fd236SBarry Smith if (roworiented) { 5556fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556ff2fd236SBarry Smith } else { 5576fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558ff2fd236SBarry Smith } 559abef11f7SSatish Balay } 560ab26458aSBarry Smith } 561ab26458aSBarry Smith } 5623a40ed3dSBarry Smith PetscFunctionReturn(0); 563ab26458aSBarry Smith } 5646fa18ffdSBarry Smith 5650bdbc534SSatish Balay #define HASH_KEY 0.6180339887 566b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 5694a2ae208SSatish Balay #undef __FUNCT__ 5704a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 5720bdbc534SSatish Balay { 5730bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 575dfbe8321SBarry Smith PetscErrorCode ierr; 576b24ad042SBarry Smith PetscInt i,j,row,col; 577b24ad042SBarry Smith PetscInt rstart_orig=baij->rstart_bs; 578b24ad042SBarry Smith PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579521d7252SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580329f5518SBarry Smith PetscReal tmp; 5813eda8832SBarry Smith MatScalar **HD = baij->hd,value; 5822515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 583b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5844a15367fSSatish Balay #endif 5850bdbc534SSatish Balay 5860bdbc534SSatish Balay PetscFunctionBegin; 5870bdbc534SSatish Balay 5880bdbc534SSatish Balay for (i=0; i<m; i++) { 5892515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 59029bbc08cSBarry Smith if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 59177431f27SBarry Smith if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 5920bdbc534SSatish Balay #endif 5930bdbc534SSatish Balay row = im[i]; 594c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 5950bdbc534SSatish Balay for (j=0; j<n; j++) { 5960bdbc534SSatish Balay col = in[j]; 5976fa18ffdSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 599c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 600c2760754SSatish Balay h1 = HASH(size,key,tmp); 6010bdbc534SSatish Balay 602c2760754SSatish Balay 603c2760754SSatish Balay idx = h1; 6042515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 605187ce0cbSSatish Balay insert_ct++; 606187ce0cbSSatish Balay total_ct++; 607187ce0cbSSatish Balay if (HT[idx] != key) { 608187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609187ce0cbSSatish Balay if (idx == size) { 610187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611187ce0cbSSatish Balay if (idx == h1) { 61277431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613187ce0cbSSatish Balay } 614187ce0cbSSatish Balay } 615187ce0cbSSatish Balay } 616187ce0cbSSatish Balay #else 617c2760754SSatish Balay if (HT[idx] != key) { 618c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619c2760754SSatish Balay if (idx == size) { 620c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621c2760754SSatish Balay if (idx == h1) { 62277431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623c2760754SSatish Balay } 624c2760754SSatish Balay } 625c2760754SSatish Balay } 626187ce0cbSSatish Balay #endif 627c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 628c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 6300bdbc534SSatish Balay } 6310bdbc534SSatish Balay } else { 6320bdbc534SSatish Balay if (!baij->donotstash) { 633ff2fd236SBarry Smith if (roworiented) { 6348798bf22SSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635ff2fd236SBarry Smith } else { 6368798bf22SSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 6370bdbc534SSatish Balay } 6380bdbc534SSatish Balay } 6390bdbc534SSatish Balay } 6400bdbc534SSatish Balay } 6412515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 642187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 643187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 644187ce0cbSSatish Balay #endif 6450bdbc534SSatish Balay PetscFunctionReturn(0); 6460bdbc534SSatish Balay } 6470bdbc534SSatish Balay 6484a2ae208SSatish Balay #undef __FUNCT__ 6494a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 6510bdbc534SSatish Balay { 6520bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653273d9f13SBarry Smith PetscTruth roworiented = baij->roworiented; 654dfbe8321SBarry Smith PetscErrorCode ierr; 655b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 656b24ad042SBarry Smith PetscInt rstart=baij->rstart ; 657521d7252SBarry Smith PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659329f5518SBarry Smith PetscReal tmp; 6603eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 661f15d580aSBarry Smith const MatScalar *v_t,*value; 6622515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 663b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 6644a15367fSSatish Balay #endif 6650bdbc534SSatish Balay 666d0a41580SSatish Balay PetscFunctionBegin; 667d0a41580SSatish Balay 6680bdbc534SSatish Balay if (roworiented) { 6690bdbc534SSatish Balay stepval = (n-1)*bs; 6700bdbc534SSatish Balay } else { 6710bdbc534SSatish Balay stepval = (m-1)*bs; 6720bdbc534SSatish Balay } 6730bdbc534SSatish Balay for (i=0; i<m; i++) { 6742515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 67577431f27SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 67677431f27SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 6770bdbc534SSatish Balay #endif 6780bdbc534SSatish Balay row = im[i]; 679187ce0cbSSatish Balay v_t = v + i*bs2; 680c2760754SSatish Balay if (row >= rstart && row < rend) { 6810bdbc534SSatish Balay for (j=0; j<n; j++) { 6820bdbc534SSatish Balay col = in[j]; 6830bdbc534SSatish Balay 6840bdbc534SSatish Balay /* Look up into the Hash Table */ 685c2760754SSatish Balay key = row*Nbs+col+1; 686c2760754SSatish Balay h1 = HASH(size,key,tmp); 6870bdbc534SSatish Balay 688c2760754SSatish Balay idx = h1; 6892515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 690187ce0cbSSatish Balay total_ct++; 691187ce0cbSSatish Balay insert_ct++; 692187ce0cbSSatish Balay if (HT[idx] != key) { 693187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694187ce0cbSSatish Balay if (idx == size) { 695187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696187ce0cbSSatish Balay if (idx == h1) { 69777431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698187ce0cbSSatish Balay } 699187ce0cbSSatish Balay } 700187ce0cbSSatish Balay } 701187ce0cbSSatish Balay #else 702c2760754SSatish Balay if (HT[idx] != key) { 703c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704c2760754SSatish Balay if (idx == size) { 705c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706c2760754SSatish Balay if (idx == h1) { 70777431f27SBarry Smith SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708c2760754SSatish Balay } 709c2760754SSatish Balay } 710c2760754SSatish Balay } 711187ce0cbSSatish Balay #endif 712c2760754SSatish Balay baij_a = HD[idx]; 7130bdbc534SSatish Balay if (roworiented) { 714c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 716187ce0cbSSatish Balay value = v_t; 717187ce0cbSSatish Balay v_t += bs; 718fef45726SSatish Balay if (addv == ADD_VALUES) { 719c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 720c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 721fef45726SSatish Balay baij_a[jj] += *value++; 722b4cc0f5aSSatish Balay } 723b4cc0f5aSSatish Balay } 724fef45726SSatish Balay } else { 725c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 726c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 727fef45726SSatish Balay baij_a[jj] = *value++; 728fef45726SSatish Balay } 729fef45726SSatish Balay } 730fef45726SSatish Balay } 7310bdbc534SSatish Balay } else { 7320bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 733fef45726SSatish Balay if (addv == ADD_VALUES) { 734b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 7350bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 736fef45726SSatish Balay baij_a[jj] += *value++; 737fef45726SSatish Balay } 738fef45726SSatish Balay } 739fef45726SSatish Balay } else { 740fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 742fef45726SSatish Balay baij_a[jj] = *value++; 743fef45726SSatish Balay } 744b4cc0f5aSSatish Balay } 7450bdbc534SSatish Balay } 7460bdbc534SSatish Balay } 7470bdbc534SSatish Balay } 7480bdbc534SSatish Balay } else { 7490bdbc534SSatish Balay if (!baij->donotstash) { 7500bdbc534SSatish Balay if (roworiented) { 7518798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7520bdbc534SSatish Balay } else { 7538798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 7540bdbc534SSatish Balay } 7550bdbc534SSatish Balay } 7560bdbc534SSatish Balay } 7570bdbc534SSatish Balay } 7582515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 759187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 760187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 761187ce0cbSSatish Balay #endif 7620bdbc534SSatish Balay PetscFunctionReturn(0); 7630bdbc534SSatish Balay } 764133cdb44SSatish Balay 7654a2ae208SSatish Balay #undef __FUNCT__ 7664a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 767b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768d6de1c52SSatish Balay { 769d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 7706849ba73SBarry Smith PetscErrorCode ierr; 771521d7252SBarry Smith PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772b24ad042SBarry Smith PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773d6de1c52SSatish Balay 774133cdb44SSatish Balay PetscFunctionBegin; 775d6de1c52SSatish Balay for (i=0; i<m; i++) { 77677431f27SBarry Smith if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 77777431f27SBarry Smith if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779d6de1c52SSatish Balay row = idxm[i] - bsrstart; 780d6de1c52SSatish Balay for (j=0; j<n; j++) { 78177431f27SBarry Smith if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 78277431f27SBarry Smith if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 784d6de1c52SSatish Balay col = idxn[j] - bscstart; 78598dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786d64ed03dSBarry Smith } else { 787905e6a2fSBarry Smith if (!baij->colmap) { 788905e6a2fSBarry Smith ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789905e6a2fSBarry Smith } 790aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 7910f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792fa46199cSSatish Balay data --; 79348e59246SSatish Balay #else 79448e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 79548e59246SSatish Balay #endif 79648e59246SSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797d9d09a02SSatish Balay else { 79848e59246SSatish Balay col = data + idxn[j]%bs; 79998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800d6de1c52SSatish Balay } 801d6de1c52SSatish Balay } 802d6de1c52SSatish Balay } 803d64ed03dSBarry Smith } else { 80429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805d6de1c52SSatish Balay } 806d6de1c52SSatish Balay } 8073a40ed3dSBarry Smith PetscFunctionReturn(0); 808d6de1c52SSatish Balay } 809d6de1c52SSatish Balay 8104a2ae208SSatish Balay #undef __FUNCT__ 8114a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 812dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813d6de1c52SSatish Balay { 814d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816dfbe8321SBarry Smith PetscErrorCode ierr; 817b24ad042SBarry Smith PetscInt i,bs2=baij->bs2; 818329f5518SBarry Smith PetscReal sum = 0.0; 8193eda8832SBarry Smith MatScalar *v; 820d6de1c52SSatish Balay 821d64ed03dSBarry Smith PetscFunctionBegin; 822d6de1c52SSatish Balay if (baij->size == 1) { 823064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824d6de1c52SSatish Balay } else { 825d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 826d6de1c52SSatish Balay v = amat->a; 827d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++) { 828aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 829329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830d6de1c52SSatish Balay #else 831d6de1c52SSatish Balay sum += (*v)*(*v); v++; 832d6de1c52SSatish Balay #endif 833d6de1c52SSatish Balay } 834d6de1c52SSatish Balay v = bmat->a; 835d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++) { 836aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 837329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838d6de1c52SSatish Balay #else 839d6de1c52SSatish Balay sum += (*v)*(*v); v++; 840d6de1c52SSatish Balay #endif 841d6de1c52SSatish Balay } 842064f8208SBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843064f8208SBarry Smith *nrm = sqrt(*nrm); 844d64ed03dSBarry Smith } else { 84529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846d6de1c52SSatish Balay } 847d64ed03dSBarry Smith } 8483a40ed3dSBarry Smith PetscFunctionReturn(0); 849d6de1c52SSatish Balay } 85057b952d6SSatish Balay 851bd7f49f5SSatish Balay 852fef45726SSatish Balay /* 853fef45726SSatish Balay Creates the hash table, and sets the table 854fef45726SSatish Balay This table is created only once. 855fef45726SSatish Balay If new entried need to be added to the matrix 856fef45726SSatish Balay then the hash table has to be destroyed and 857fef45726SSatish Balay recreated. 858fef45726SSatish Balay */ 8594a2ae208SSatish Balay #undef __FUNCT__ 8604a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862596b8d2eSBarry Smith { 863596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 865596b8d2eSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 8676849ba73SBarry Smith PetscErrorCode ierr; 868b24ad042SBarry Smith PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 869b24ad042SBarry Smith PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870b24ad042SBarry Smith PetscInt *HT,key; 8713eda8832SBarry Smith MatScalar **HD; 872329f5518SBarry Smith PetscReal tmp; 8732515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 874b24ad042SBarry Smith PetscInt ct=0,max=0; 8754a15367fSSatish Balay #endif 876fef45726SSatish Balay 877d64ed03dSBarry Smith PetscFunctionBegin; 878b24ad042SBarry Smith baij->ht_size=(PetscInt)(factor*nz); 8790bdbc534SSatish Balay size = baij->ht_size; 880fef45726SSatish Balay 8810bdbc534SSatish Balay if (baij->ht) { 8820bdbc534SSatish Balay PetscFunctionReturn(0); 883596b8d2eSBarry Smith } 8840bdbc534SSatish Balay 885fef45726SSatish Balay /* Allocate Memory for Hash Table */ 886b24ad042SBarry Smith ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887b24ad042SBarry Smith baij->ht = (PetscInt*)(baij->hd + size); 888b9e4cc15SSatish Balay HD = baij->hd; 889a07cd24cSSatish Balay HT = baij->ht; 890b9e4cc15SSatish Balay 891b9e4cc15SSatish Balay 892b24ad042SBarry Smith ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 8930bdbc534SSatish Balay 894596b8d2eSBarry Smith 895596b8d2eSBarry Smith /* Loop Over A */ 8960bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 897596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 8980bdbc534SSatish Balay row = i+rstart; 8990bdbc534SSatish Balay col = aj[j]+cstart; 900596b8d2eSBarry Smith 901187ce0cbSSatish Balay key = row*Nbs + col + 1; 902c2760754SSatish Balay h1 = HASH(size,key,tmp); 9030bdbc534SSatish Balay for (k=0; k<size; k++){ 904958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9050bdbc534SSatish Balay HT[(h1+k)%size] = key; 9060bdbc534SSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 907596b8d2eSBarry Smith break; 9082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 909187ce0cbSSatish Balay } else { 910187ce0cbSSatish Balay ct++; 911187ce0cbSSatish Balay #endif 912596b8d2eSBarry Smith } 913187ce0cbSSatish Balay } 9142515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 915187ce0cbSSatish Balay if (k> max) max = k; 916187ce0cbSSatish Balay #endif 917596b8d2eSBarry Smith } 918596b8d2eSBarry Smith } 919596b8d2eSBarry Smith /* Loop Over B */ 9200bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 921596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 9220bdbc534SSatish Balay row = i+rstart; 9230bdbc534SSatish Balay col = garray[bj[j]]; 924187ce0cbSSatish Balay key = row*Nbs + col + 1; 925c2760754SSatish Balay h1 = HASH(size,key,tmp); 9260bdbc534SSatish Balay for (k=0; k<size; k++){ 927958c9bccSBarry Smith if (!HT[(h1+k)%size]) { 9280bdbc534SSatish Balay HT[(h1+k)%size] = key; 9290bdbc534SSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 930596b8d2eSBarry Smith break; 9312515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 932187ce0cbSSatish Balay } else { 933187ce0cbSSatish Balay ct++; 934187ce0cbSSatish Balay #endif 935596b8d2eSBarry Smith } 936187ce0cbSSatish Balay } 9372515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 938187ce0cbSSatish Balay if (k> max) max = k; 939187ce0cbSSatish Balay #endif 940596b8d2eSBarry Smith } 941596b8d2eSBarry Smith } 942596b8d2eSBarry Smith 943596b8d2eSBarry Smith /* Print Summary */ 9442515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 945c38d4ed2SBarry Smith for (i=0,j=0; i<size; i++) { 946596b8d2eSBarry Smith if (HT[i]) {j++;} 947c38d4ed2SBarry Smith } 94877431f27SBarry Smith PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949187ce0cbSSatish Balay #endif 9503a40ed3dSBarry Smith PetscFunctionReturn(0); 951596b8d2eSBarry Smith } 95257b952d6SSatish Balay 9534a2ae208SSatish Balay #undef __FUNCT__ 9544a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956bbb85fb3SSatish Balay { 957bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958dfbe8321SBarry Smith PetscErrorCode ierr; 959b24ad042SBarry Smith PetscInt nstash,reallocs; 960bbb85fb3SSatish Balay InsertMode addv; 961bbb85fb3SSatish Balay 962bbb85fb3SSatish Balay PetscFunctionBegin; 963bbb85fb3SSatish Balay if (baij->donotstash) { 964bbb85fb3SSatish Balay PetscFunctionReturn(0); 965bbb85fb3SSatish Balay } 966bbb85fb3SSatish Balay 967bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 968bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969bbb85fb3SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 97029bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971bbb85fb3SSatish Balay } 972bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 973bbb85fb3SSatish Balay 9748798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 9758798bf22SSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 9768798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 97777431f27SBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs); 97846680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 97977431f27SBarry Smith PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 980bbb85fb3SSatish Balay PetscFunctionReturn(0); 981bbb85fb3SSatish Balay } 982bbb85fb3SSatish Balay 9834a2ae208SSatish Balay #undef __FUNCT__ 9844a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986bbb85fb3SSatish Balay { 987bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988a2d1c673SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 9896849ba73SBarry Smith PetscErrorCode ierr; 990b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 991b24ad042SBarry Smith PetscInt *row,*col,other_disassembled; 9927c922b88SBarry Smith PetscTruth r1,r2,r3; 9933eda8832SBarry Smith MatScalar *val; 994bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 995b24ad042SBarry Smith PetscMPIInt n; 996bbb85fb3SSatish Balay 997bbb85fb3SSatish Balay PetscFunctionBegin; 998bbb85fb3SSatish Balay if (!baij->donotstash) { 999a2d1c673SSatish Balay while (1) { 10008798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1001a2d1c673SSatish Balay if (!flg) break; 1002a2d1c673SSatish Balay 1003bbb85fb3SSatish Balay for (i=0; i<n;) { 1004bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1005bbb85fb3SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1006bbb85fb3SSatish Balay if (j < n) ncols = j-i; 1007bbb85fb3SSatish Balay else ncols = n-i; 1008bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 100993fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1010bbb85fb3SSatish Balay i = j; 1011bbb85fb3SSatish Balay } 1012bbb85fb3SSatish Balay } 10138798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1014a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1015a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1016a2d1c673SSatish Balay restore the original flags */ 1017a2d1c673SSatish Balay r1 = baij->roworiented; 1018a2d1c673SSatish Balay r2 = a->roworiented; 1019a2d1c673SSatish Balay r3 = b->roworiented; 10207c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 10217c922b88SBarry Smith a->roworiented = PETSC_FALSE; 10227c922b88SBarry Smith b->roworiented = PETSC_FALSE; 1023a2d1c673SSatish Balay while (1) { 10248798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1025a2d1c673SSatish Balay if (!flg) break; 1026a2d1c673SSatish Balay 1027a2d1c673SSatish Balay for (i=0; i<n;) { 1028a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1029a2d1c673SSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1030a2d1c673SSatish Balay if (j < n) ncols = j-i; 1031a2d1c673SSatish Balay else ncols = n-i; 103293fea6afSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1033a2d1c673SSatish Balay i = j; 1034a2d1c673SSatish Balay } 1035a2d1c673SSatish Balay } 10368798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1037a2d1c673SSatish Balay baij->roworiented = r1; 1038a2d1c673SSatish Balay a->roworiented = r2; 1039a2d1c673SSatish Balay b->roworiented = r3; 1040bbb85fb3SSatish Balay } 1041bbb85fb3SSatish Balay 1042bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1043bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1044bbb85fb3SSatish Balay 1045bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 1046bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1047bbb85fb3SSatish Balay /* 1048bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 1049bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 1050bbb85fb3SSatish Balay */ 1051bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1052bbb85fb3SSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1053bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 1054bbb85fb3SSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1055bbb85fb3SSatish Balay } 1056bbb85fb3SSatish Balay } 1057bbb85fb3SSatish Balay 1058bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1059bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1060bbb85fb3SSatish Balay } 106173e7a558SHong Zhang b->compressedrow.use = PETSC_TRUE; 1062bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1063bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1064bbb85fb3SSatish Balay 10652515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 1066bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1067f6275e2eSBarry Smith PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1068bbb85fb3SSatish Balay baij->ht_total_ct = 0; 1069bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 1070bbb85fb3SSatish Balay } 1071bbb85fb3SSatish Balay #endif 1072bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1073bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1074bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1075bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1076bbb85fb3SSatish Balay } 1077bbb85fb3SSatish Balay 1078606d414cSSatish Balay if (baij->rowvalues) { 1079606d414cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1080606d414cSSatish Balay baij->rowvalues = 0; 1081606d414cSSatish Balay } 1082bbb85fb3SSatish Balay PetscFunctionReturn(0); 1083bbb85fb3SSatish Balay } 108457b952d6SSatish Balay 10854a2ae208SSatish Balay #undef __FUNCT__ 10864a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 10876849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 108857b952d6SSatish Balay { 108957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1090dfbe8321SBarry Smith PetscErrorCode ierr; 1091b24ad042SBarry Smith PetscMPIInt size = baij->size,rank = baij->rank; 1092521d7252SBarry Smith PetscInt bs = mat->bs; 109332077d6dSBarry Smith PetscTruth iascii,isdraw; 1094b0a32e0cSBarry Smith PetscViewer sviewer; 1095f3ef73ceSBarry Smith PetscViewerFormat format; 109657b952d6SSatish Balay 1097d64ed03dSBarry Smith PetscFunctionBegin; 1098f81bd7ccSHong Zhang /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 109932077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1100fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 110132077d6dSBarry Smith if (iascii) { 1102b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1103456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 11044e220ebcSLois Curfman McInnes MatInfo info; 1105d132466eSBarry Smith ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1106d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 110777431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 110877431f27SBarry Smith rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1109521d7252SBarry Smith mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1110d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 111177431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1112d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 111377431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1114b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 111557b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 11163a40ed3dSBarry Smith PetscFunctionReturn(0); 1117fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 111877431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 11193a40ed3dSBarry Smith PetscFunctionReturn(0); 112004929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 112104929863SHong Zhang PetscFunctionReturn(0); 112257b952d6SSatish Balay } 112357b952d6SSatish Balay } 112457b952d6SSatish Balay 11250f5bd95cSBarry Smith if (isdraw) { 1126b0a32e0cSBarry Smith PetscDraw draw; 112757b952d6SSatish Balay PetscTruth isnull; 1128b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1129b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 113057b952d6SSatish Balay } 113157b952d6SSatish Balay 113257b952d6SSatish Balay if (size == 1) { 1133873048abSBarry Smith ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 113457b952d6SSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1135d64ed03dSBarry Smith } else { 113657b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 113757b952d6SSatish Balay Mat A; 113857b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1139b24ad042SBarry Smith PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 11403eda8832SBarry Smith MatScalar *a; 114157b952d6SSatish Balay 1142f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1143f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 114457b952d6SSatish Balay if (!rank) { 1145f204ca49SKris Buschelman ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1146d64ed03dSBarry Smith } else { 1147f204ca49SKris Buschelman ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 114857b952d6SSatish Balay } 1149f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1150521d7252SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1151b0a32e0cSBarry Smith PetscLogObjectParent(mat,A); 115257b952d6SSatish Balay 115357b952d6SSatish Balay /* copy over the A part */ 115457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 115557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1156b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 115757b952d6SSatish Balay 115857b952d6SSatish Balay for (i=0; i<mbs; i++) { 115957b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 116057b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 116157b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 116257b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 116357b952d6SSatish Balay for (k=0; k<bs; k++) { 116493fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1165cee3aa6bSSatish Balay col++; a += bs; 116657b952d6SSatish Balay } 116757b952d6SSatish Balay } 116857b952d6SSatish Balay } 116957b952d6SSatish Balay /* copy over the B part */ 117057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 117157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 117257b952d6SSatish Balay for (i=0; i<mbs; i++) { 117357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 117457b952d6SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 117557b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 117657b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 117757b952d6SSatish Balay for (k=0; k<bs; k++) { 117893fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1179cee3aa6bSSatish Balay col++; a += bs; 118057b952d6SSatish Balay } 118157b952d6SSatish Balay } 118257b952d6SSatish Balay } 1183606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 11846d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11856d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 118655843e3eSBarry Smith /* 118755843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1188b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 118955843e3eSBarry Smith */ 1190b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1191f14a1c24SBarry Smith if (!rank) { 1192e36acaf3SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 11936831982aSBarry Smith ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 119457b952d6SSatish Balay } 1195b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 119657b952d6SSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 119757b952d6SSatish Balay } 11983a40ed3dSBarry Smith PetscFunctionReturn(0); 119957b952d6SSatish Balay } 120057b952d6SSatish Balay 12014a2ae208SSatish Balay #undef __FUNCT__ 12024a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1203dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 120457b952d6SSatish Balay { 1205dfbe8321SBarry Smith PetscErrorCode ierr; 120632077d6dSBarry Smith PetscTruth iascii,isdraw,issocket,isbinary; 120757b952d6SSatish Balay 1208d64ed03dSBarry Smith PetscFunctionBegin; 120932077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1210fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1211b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1212fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 121332077d6dSBarry Smith if (iascii || isdraw || issocket || isbinary) { 12147b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 12155cd90555SBarry Smith } else { 121679a5c55eSBarry Smith SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 121757b952d6SSatish Balay } 12183a40ed3dSBarry Smith PetscFunctionReturn(0); 121957b952d6SSatish Balay } 122057b952d6SSatish Balay 12214a2ae208SSatish Balay #undef __FUNCT__ 12224a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1223dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 122479bdfe76SSatish Balay { 122579bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1226dfbe8321SBarry Smith PetscErrorCode ierr; 122779bdfe76SSatish Balay 1228d64ed03dSBarry Smith PetscFunctionBegin; 1229aa482453SBarry Smith #if defined(PETSC_USE_LOG) 123077431f27SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 123179bdfe76SSatish Balay #endif 12328798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12338798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1234606d414cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 123579bdfe76SSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 123679bdfe76SSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1237aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 12380f5bd95cSBarry Smith if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 123948e59246SSatish Balay #else 1240606d414cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 124148e59246SSatish Balay #endif 1242606d414cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1243606d414cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1244606d414cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1245606d414cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1246606d414cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1247606d414cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 12486fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 12496fa18ffdSBarry Smith if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 12506fa18ffdSBarry Smith #endif 1251606d414cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1252901853e0SKris Buschelman 1253901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1254901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1255901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1256901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1257aac34f13SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1258901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1259901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 12603a40ed3dSBarry Smith PetscFunctionReturn(0); 126179bdfe76SSatish Balay } 126279bdfe76SSatish Balay 12634a2ae208SSatish Balay #undef __FUNCT__ 12644a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1265dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1266cee3aa6bSSatish Balay { 1267cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1268dfbe8321SBarry Smith PetscErrorCode ierr; 1269b24ad042SBarry Smith PetscInt nt; 1270cee3aa6bSSatish Balay 1271d64ed03dSBarry Smith PetscFunctionBegin; 1272e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1273273d9f13SBarry Smith if (nt != A->n) { 127429bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 127547b4a8eaSLois Curfman McInnes } 1276e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1277273d9f13SBarry Smith if (nt != A->m) { 127829bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 127947b4a8eaSLois Curfman McInnes } 128043a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1281f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 128243a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 128443a90d84SBarry Smith ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 12853a40ed3dSBarry Smith PetscFunctionReturn(0); 1286cee3aa6bSSatish Balay } 1287cee3aa6bSSatish Balay 12884a2ae208SSatish Balay #undef __FUNCT__ 12894a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1290dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1291cee3aa6bSSatish Balay { 1292cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293dfbe8321SBarry Smith PetscErrorCode ierr; 1294d64ed03dSBarry Smith 1295d64ed03dSBarry Smith PetscFunctionBegin; 129643a90d84SBarry Smith ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1297f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 129843a90d84SBarry Smith ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1299f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 13003a40ed3dSBarry Smith PetscFunctionReturn(0); 1301cee3aa6bSSatish Balay } 1302cee3aa6bSSatish Balay 13034a2ae208SSatish Balay #undef __FUNCT__ 13044a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1305dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1306cee3aa6bSSatish Balay { 1307cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1308dfbe8321SBarry Smith PetscErrorCode ierr; 1309*a5ff213dSBarry Smith PetscTruth merged; 1310cee3aa6bSSatish Balay 1311d64ed03dSBarry Smith PetscFunctionBegin; 1312*a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1313cee3aa6bSSatish Balay /* do nondiagonal part */ 13147c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1315*a5ff213dSBarry Smith if (!merged) { 1316cee3aa6bSSatish Balay /* send it on its way */ 1317537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318cee3aa6bSSatish Balay /* do local part */ 13197c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1320cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1321*a5ff213dSBarry Smith /* inserted in yy until the next line */ 1322639f9d9dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1323*a5ff213dSBarry Smith } else { 1324*a5ff213dSBarry Smith /* do local part */ 1325*a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1326*a5ff213dSBarry Smith /* send it on its way */ 1327*a5ff213dSBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1328*a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1329*a5ff213dSBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1330*a5ff213dSBarry Smith } 13313a40ed3dSBarry Smith PetscFunctionReturn(0); 1332cee3aa6bSSatish Balay } 1333cee3aa6bSSatish Balay 13344a2ae208SSatish Balay #undef __FUNCT__ 13354a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1336dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1337cee3aa6bSSatish Balay { 1338cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339dfbe8321SBarry Smith PetscErrorCode ierr; 1340cee3aa6bSSatish Balay 1341d64ed03dSBarry Smith PetscFunctionBegin; 1342cee3aa6bSSatish Balay /* do nondiagonal part */ 13437c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1344cee3aa6bSSatish Balay /* send it on its way */ 1345537820f0SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1346cee3aa6bSSatish Balay /* do local part */ 13477c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1348cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1349cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1350cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1351537820f0SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 13523a40ed3dSBarry Smith PetscFunctionReturn(0); 1353cee3aa6bSSatish Balay } 1354cee3aa6bSSatish Balay 1355cee3aa6bSSatish Balay /* 1356cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1357cee3aa6bSSatish Balay diagonal block 1358cee3aa6bSSatish Balay */ 13594a2ae208SSatish Balay #undef __FUNCT__ 13604a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1361dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1362cee3aa6bSSatish Balay { 1363cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1364dfbe8321SBarry Smith PetscErrorCode ierr; 1365d64ed03dSBarry Smith 1366d64ed03dSBarry Smith PetscFunctionBegin; 1367273d9f13SBarry Smith if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13683a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13693a40ed3dSBarry Smith PetscFunctionReturn(0); 1370cee3aa6bSSatish Balay } 1371cee3aa6bSSatish Balay 13724a2ae208SSatish Balay #undef __FUNCT__ 13734a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1374dfbe8321SBarry Smith PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1375cee3aa6bSSatish Balay { 1376cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1377dfbe8321SBarry Smith PetscErrorCode ierr; 1378d64ed03dSBarry Smith 1379d64ed03dSBarry Smith PetscFunctionBegin; 1380cee3aa6bSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1381cee3aa6bSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 13823a40ed3dSBarry Smith PetscFunctionReturn(0); 1383cee3aa6bSSatish Balay } 1384026e39d0SSatish Balay 13854a2ae208SSatish Balay #undef __FUNCT__ 13864a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1387b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1388acdf5bf4SSatish Balay { 1389acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 139087828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13916849ba73SBarry Smith PetscErrorCode ierr; 1392521d7252SBarry Smith PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1393b24ad042SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1394b24ad042SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstart; 1395acdf5bf4SSatish Balay 1396d64ed03dSBarry Smith PetscFunctionBegin; 1397abc0a331SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1398acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1399acdf5bf4SSatish Balay 1400acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1401acdf5bf4SSatish Balay /* 1402acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1403acdf5bf4SSatish Balay */ 1404acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1405b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1406bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1407acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1408acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 1409acdf5bf4SSatish Balay } 1410b24ad042SBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1411b24ad042SBarry Smith mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1412acdf5bf4SSatish Balay } 1413acdf5bf4SSatish Balay 141429bbc08cSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1415d9d09a02SSatish Balay lrow = row - brstart; 1416acdf5bf4SSatish Balay 1417acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1418acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1419acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1420f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1421f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1422acdf5bf4SSatish Balay nztot = nzA + nzB; 1423acdf5bf4SSatish Balay 1424acdf5bf4SSatish Balay cmap = mat->garray; 1425acdf5bf4SSatish Balay if (v || idx) { 1426acdf5bf4SSatish Balay if (nztot) { 1427acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1428b24ad042SBarry Smith PetscInt imark = -1; 1429acdf5bf4SSatish Balay if (v) { 1430acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1431acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1432d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1433acdf5bf4SSatish Balay else break; 1434acdf5bf4SSatish Balay } 1435acdf5bf4SSatish Balay imark = i; 1436acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1437acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1438acdf5bf4SSatish Balay } 1439acdf5bf4SSatish Balay if (idx) { 1440acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1441acdf5bf4SSatish Balay if (imark > -1) { 1442acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1443bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1444acdf5bf4SSatish Balay } 1445acdf5bf4SSatish Balay } else { 1446acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1447d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1448d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1449acdf5bf4SSatish Balay else break; 1450acdf5bf4SSatish Balay } 1451acdf5bf4SSatish Balay imark = i; 1452acdf5bf4SSatish Balay } 1453d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1454d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1455acdf5bf4SSatish Balay } 1456d64ed03dSBarry Smith } else { 1457d212a18eSSatish Balay if (idx) *idx = 0; 1458d212a18eSSatish Balay if (v) *v = 0; 1459d212a18eSSatish Balay } 1460acdf5bf4SSatish Balay } 1461acdf5bf4SSatish Balay *nz = nztot; 1462f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1463f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14643a40ed3dSBarry Smith PetscFunctionReturn(0); 1465acdf5bf4SSatish Balay } 1466acdf5bf4SSatish Balay 14674a2ae208SSatish Balay #undef __FUNCT__ 14684a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1469b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1470acdf5bf4SSatish Balay { 1471acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1472d64ed03dSBarry Smith 1473d64ed03dSBarry Smith PetscFunctionBegin; 1474abc0a331SBarry Smith if (!baij->getrowactive) { 147529bbc08cSBarry Smith SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1476acdf5bf4SSatish Balay } 1477acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14783a40ed3dSBarry Smith PetscFunctionReturn(0); 1479acdf5bf4SSatish Balay } 1480acdf5bf4SSatish Balay 14814a2ae208SSatish Balay #undef __FUNCT__ 14824a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1483dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 148458667388SSatish Balay { 148558667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1486dfbe8321SBarry Smith PetscErrorCode ierr; 1487d64ed03dSBarry Smith 1488d64ed03dSBarry Smith PetscFunctionBegin; 148958667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 149058667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14913a40ed3dSBarry Smith PetscFunctionReturn(0); 149258667388SSatish Balay } 14930ac07820SSatish Balay 14944a2ae208SSatish Balay #undef __FUNCT__ 14954a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1496dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14970ac07820SSatish Balay { 14984e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14994e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1500dfbe8321SBarry Smith PetscErrorCode ierr; 1501329f5518SBarry Smith PetscReal isend[5],irecv[5]; 15020ac07820SSatish Balay 1503d64ed03dSBarry Smith PetscFunctionBegin; 1504521d7252SBarry Smith info->block_size = (PetscReal)matin->bs; 15054e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 15060e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1507de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 15084e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 15090e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1510de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 15110ac07820SSatish Balay if (flag == MAT_LOCAL) { 15124e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15134e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15144e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15154e220ebcSLois Curfman McInnes info->memory = isend[3]; 15164e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15170ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1518d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 15194e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15204e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15214e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15224e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15234e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15240ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1525d7d1e502SBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 15264e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15274e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15284e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15294e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15304e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1531d41123aaSBarry Smith } else { 153277431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15330ac07820SSatish Balay } 1534f6275e2eSBarry Smith info->rows_global = (PetscReal)A->M; 1535f6275e2eSBarry Smith info->columns_global = (PetscReal)A->N; 1536f6275e2eSBarry Smith info->rows_local = (PetscReal)A->m; 1537f6275e2eSBarry Smith info->columns_local = (PetscReal)A->N; 15384e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15394e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15404e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15413a40ed3dSBarry Smith PetscFunctionReturn(0); 15420ac07820SSatish Balay } 15430ac07820SSatish Balay 15444a2ae208SSatish Balay #undef __FUNCT__ 15454a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1546dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 154758667388SSatish Balay { 154858667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1549dfbe8321SBarry Smith PetscErrorCode ierr; 155058667388SSatish Balay 1551d64ed03dSBarry Smith PetscFunctionBegin; 155212c028f9SKris Buschelman switch (op) { 155312c028f9SKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 155412c028f9SKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 155512c028f9SKris Buschelman case MAT_COLUMNS_UNSORTED: 155612c028f9SKris Buschelman case MAT_COLUMNS_SORTED: 155712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 155812c028f9SKris Buschelman case MAT_KEEP_ZEROED_ROWS: 155912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 156098305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 156198305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 156212c028f9SKris Buschelman break; 156312c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15647c922b88SBarry Smith a->roworiented = PETSC_TRUE; 156598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 156698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 156712c028f9SKris Buschelman break; 156812c028f9SKris Buschelman case MAT_ROWS_SORTED: 156912c028f9SKris Buschelman case MAT_ROWS_UNSORTED: 157012c028f9SKris Buschelman case MAT_YES_NEW_DIAGONALS: 1571b0a32e0cSBarry Smith PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 157212c028f9SKris Buschelman break; 157312c028f9SKris Buschelman case MAT_COLUMN_ORIENTED: 15747c922b88SBarry Smith a->roworiented = PETSC_FALSE; 157598305bb5SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 157698305bb5SBarry Smith ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 157712c028f9SKris Buschelman break; 157812c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15797c922b88SBarry Smith a->donotstash = PETSC_TRUE; 158012c028f9SKris Buschelman break; 158112c028f9SKris Buschelman case MAT_NO_NEW_DIAGONALS: 158229bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 158312c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15847c922b88SBarry Smith a->ht_flag = PETSC_TRUE; 158512c028f9SKris Buschelman break; 158677e54ba9SKris Buschelman case MAT_SYMMETRIC: 158777e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15882188ac68SBarry Smith case MAT_HERMITIAN: 15892188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 15902188ac68SBarry Smith ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 15912188ac68SBarry Smith break; 15929a4540c5SBarry Smith case MAT_NOT_SYMMETRIC: 15939a4540c5SBarry Smith case MAT_NOT_STRUCTURALLY_SYMMETRIC: 15949a4540c5SBarry Smith case MAT_NOT_HERMITIAN: 15959a4540c5SBarry Smith case MAT_NOT_SYMMETRY_ETERNAL: 159677e54ba9SKris Buschelman break; 159712c028f9SKris Buschelman default: 159829bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 1599d64ed03dSBarry Smith } 16003a40ed3dSBarry Smith PetscFunctionReturn(0); 160158667388SSatish Balay } 160258667388SSatish Balay 16034a2ae208SSatish Balay #undef __FUNCT__ 16044a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1605dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 16060ac07820SSatish Balay { 16070ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 16080ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 16090ac07820SSatish Balay Mat B; 1610dfbe8321SBarry Smith PetscErrorCode ierr; 1611b24ad042SBarry Smith PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1612521d7252SBarry Smith PetscInt bs=A->bs,mbs=baij->mbs; 16133eda8832SBarry Smith MatScalar *a; 16140ac07820SSatish Balay 1615d64ed03dSBarry Smith PetscFunctionBegin; 161629bbc08cSBarry Smith if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1617f204ca49SKris Buschelman ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1618f204ca49SKris Buschelman ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1619521d7252SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 16200ac07820SSatish Balay 16210ac07820SSatish Balay /* copy over the A part */ 16220ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16230ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1624b24ad042SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 16250ac07820SSatish Balay 16260ac07820SSatish Balay for (i=0; i<mbs; i++) { 16270ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16280ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16290ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16300ac07820SSatish Balay col = (baij->cstart+aj[j])*bs; 16310ac07820SSatish Balay for (k=0; k<bs; k++) { 163293fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16330ac07820SSatish Balay col++; a += bs; 16340ac07820SSatish Balay } 16350ac07820SSatish Balay } 16360ac07820SSatish Balay } 16370ac07820SSatish Balay /* copy over the B part */ 16380ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16390ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16400ac07820SSatish Balay for (i=0; i<mbs; i++) { 16410ac07820SSatish Balay rvals[0] = bs*(baij->rstart + i); 16420ac07820SSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 16430ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16440ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16450ac07820SSatish Balay for (k=0; k<bs; k++) { 164693fea6afSBarry Smith ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 16470ac07820SSatish Balay col++; a += bs; 16480ac07820SSatish Balay } 16490ac07820SSatish Balay } 16500ac07820SSatish Balay } 1651606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16520ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16530ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16540ac07820SSatish Balay 16557c922b88SBarry Smith if (matout) { 16560ac07820SSatish Balay *matout = B; 16570ac07820SSatish Balay } else { 1658273d9f13SBarry Smith ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 16590ac07820SSatish Balay } 16603a40ed3dSBarry Smith PetscFunctionReturn(0); 16610ac07820SSatish Balay } 16620e95ebc0SSatish Balay 16634a2ae208SSatish Balay #undef __FUNCT__ 16644a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1665dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16660e95ebc0SSatish Balay { 166736c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 166836c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1669dfbe8321SBarry Smith PetscErrorCode ierr; 1670b24ad042SBarry Smith PetscInt s1,s2,s3; 16710e95ebc0SSatish Balay 1672d64ed03dSBarry Smith PetscFunctionBegin; 167336c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 167436c4a09eSSatish Balay if (rr) { 167536c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 167629bbc08cSBarry Smith if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 167736c4a09eSSatish Balay /* Overlap communication with computation. */ 167836c4a09eSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 167936c4a09eSSatish Balay } 16800e95ebc0SSatish Balay if (ll) { 16810e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 168229bbc08cSBarry Smith if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1683a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 16840e95ebc0SSatish Balay } 168536c4a09eSSatish Balay /* scale the diagonal block */ 168636c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 168736c4a09eSSatish Balay 168836c4a09eSSatish Balay if (rr) { 168936c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 169036c4a09eSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1691a21fb8cbSBarry Smith ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 169236c4a09eSSatish Balay } 169336c4a09eSSatish Balay 16943a40ed3dSBarry Smith PetscFunctionReturn(0); 16950e95ebc0SSatish Balay } 16960e95ebc0SSatish Balay 16974a2ae208SSatish Balay #undef __FUNCT__ 16984a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1699dfbe8321SBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 17000ac07820SSatish Balay { 17010ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 17026849ba73SBarry Smith PetscErrorCode ierr; 1703b24ad042SBarry Smith PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1704b24ad042SBarry Smith PetscInt i,N,*rows,*owners = l->rowners; 1705b24ad042SBarry Smith PetscInt *nprocs,j,idx,nsends,row; 1706b24ad042SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1707b24ad042SBarry Smith PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 1708521d7252SBarry Smith PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 17090ac07820SSatish Balay MPI_Comm comm = A->comm; 17100ac07820SSatish Balay MPI_Request *send_waits,*recv_waits; 17110ac07820SSatish Balay MPI_Status recv_status,*send_status; 17120ac07820SSatish Balay IS istmp; 171335d8aa7fSBarry Smith PetscTruth found; 17140ac07820SSatish Balay 1715d64ed03dSBarry Smith PetscFunctionBegin; 1716f14a1c24SBarry Smith ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 17170ac07820SSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 17180ac07820SSatish Balay 17190ac07820SSatish Balay /* first count number of contributors to each processor */ 1720b24ad042SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1721b24ad042SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1722b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 17230ac07820SSatish Balay for (i=0; i<N; i++) { 17240ac07820SSatish Balay idx = rows[i]; 172535d8aa7fSBarry Smith found = PETSC_FALSE; 17260ac07820SSatish Balay for (j=0; j<size; j++) { 17270ac07820SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1728c1dc657dSBarry Smith nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 17290ac07820SSatish Balay } 17300ac07820SSatish Balay } 173129bbc08cSBarry Smith if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 17320ac07820SSatish Balay } 1733c1dc657dSBarry Smith nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 17340ac07820SSatish Balay 17350ac07820SSatish Balay /* inform other processors of number of messages and max length*/ 1736c1dc657dSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 17370ac07820SSatish Balay 17380ac07820SSatish Balay /* post receives: */ 1739b24ad042SBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1740b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 17410ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 1742b24ad042SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 17430ac07820SSatish Balay } 17440ac07820SSatish Balay 17450ac07820SSatish Balay /* do sends: 17460ac07820SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 17470ac07820SSatish Balay the ith processor 17480ac07820SSatish Balay */ 1749b24ad042SBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1750b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1751b24ad042SBarry Smith ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 17520ac07820SSatish Balay starts[0] = 0; 1753c1dc657dSBarry Smith for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17540ac07820SSatish Balay for (i=0; i<N; i++) { 17550ac07820SSatish Balay svalues[starts[owner[i]]++] = rows[i]; 17560ac07820SSatish Balay } 17576831982aSBarry Smith ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 17580ac07820SSatish Balay 17590ac07820SSatish Balay starts[0] = 0; 1760c1dc657dSBarry Smith for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 17610ac07820SSatish Balay count = 0; 17620ac07820SSatish Balay for (i=0; i<size; i++) { 1763c1dc657dSBarry Smith if (nprocs[2*i+1]) { 1764b24ad042SBarry Smith ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 17650ac07820SSatish Balay } 17660ac07820SSatish Balay } 1767606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 17680ac07820SSatish Balay 17690ac07820SSatish Balay base = owners[rank]*bs; 17700ac07820SSatish Balay 17710ac07820SSatish Balay /* wait on receives */ 1772b24ad042SBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 17730ac07820SSatish Balay source = lens + nrecvs; 17740ac07820SSatish Balay count = nrecvs; slen = 0; 17750ac07820SSatish Balay while (count) { 1776ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 17770ac07820SSatish Balay /* unpack receives into our local space */ 1778b24ad042SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 17790ac07820SSatish Balay source[imdex] = recv_status.MPI_SOURCE; 17800ac07820SSatish Balay lens[imdex] = n; 17810ac07820SSatish Balay slen += n; 17820ac07820SSatish Balay count--; 17830ac07820SSatish Balay } 1784606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 17850ac07820SSatish Balay 17860ac07820SSatish Balay /* move the data into the send scatter */ 1787b24ad042SBarry Smith ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 17880ac07820SSatish Balay count = 0; 17890ac07820SSatish Balay for (i=0; i<nrecvs; i++) { 17900ac07820SSatish Balay values = rvalues + i*nmax; 17910ac07820SSatish Balay for (j=0; j<lens[i]; j++) { 17920ac07820SSatish Balay lrows[count++] = values[j] - base; 17930ac07820SSatish Balay } 17940ac07820SSatish Balay } 1795606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1796606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1797606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1798606d414cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 17990ac07820SSatish Balay 18000ac07820SSatish Balay /* actually zap the local rows */ 1801029af93fSBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1802b0a32e0cSBarry Smith PetscLogObjectParent(A,istmp); 1803a07cd24cSSatish Balay 180472dacd9aSBarry Smith /* 180572dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 180672dacd9aSBarry Smith is square and the user wishes to set the diagonal we use seperate 180772dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 180872dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 180972dacd9aSBarry Smith 181072dacd9aSBarry Smith Contributed by: Mathew Knepley 181172dacd9aSBarry Smith */ 18129c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 18136fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 18149c957beeSSatish Balay if (diag && (l->A->M == l->A->N)) { 18156fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 18169c957beeSSatish Balay } else if (diag) { 18176fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1818fa46199cSSatish Balay if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 181929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1820fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 18216525c446SSatish Balay } 1822a07cd24cSSatish Balay for (i=0; i<slen; i++) { 1823a07cd24cSSatish Balay row = lrows[i] + rstart_bs; 18243f11ea53SBarry Smith ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1825a07cd24cSSatish Balay } 1826a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1827a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 18289c957beeSSatish Balay } else { 18296fa18ffdSBarry Smith ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1830a07cd24cSSatish Balay } 18319c957beeSSatish Balay 18329c957beeSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1833606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1834a07cd24cSSatish Balay 18350ac07820SSatish Balay /* wait on sends */ 18360ac07820SSatish Balay if (nsends) { 183782502324SSatish Balay ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1838ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1839606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 18400ac07820SSatish Balay } 1841606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1842606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 18430ac07820SSatish Balay 18443a40ed3dSBarry Smith PetscFunctionReturn(0); 18450ac07820SSatish Balay } 184672dacd9aSBarry Smith 18474a2ae208SSatish Balay #undef __FUNCT__ 18484a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1849dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1850ba4ca20aSSatish Balay { 1851ba4ca20aSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 185225fdafccSSatish Balay MPI_Comm comm = A->comm; 1853b24ad042SBarry Smith static PetscTruth called = PETSC_FALSE; 1854dfbe8321SBarry Smith PetscErrorCode ierr; 1855ba4ca20aSSatish Balay 1856d64ed03dSBarry Smith PetscFunctionBegin; 18573a40ed3dSBarry Smith if (!a->rank) { 18583a40ed3dSBarry Smith ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 185925fdafccSSatish Balay } 1860b24ad042SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1861d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1862d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 18633a40ed3dSBarry Smith PetscFunctionReturn(0); 1864ba4ca20aSSatish Balay } 18650ac07820SSatish Balay 18664a2ae208SSatish Balay #undef __FUNCT__ 18674a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1868dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1869bb5a7306SBarry Smith { 1870bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1871dfbe8321SBarry Smith PetscErrorCode ierr; 1872d64ed03dSBarry Smith 1873d64ed03dSBarry Smith PetscFunctionBegin; 1874bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 18753a40ed3dSBarry Smith PetscFunctionReturn(0); 1876bb5a7306SBarry Smith } 1877bb5a7306SBarry Smith 18786849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 18790ac07820SSatish Balay 18804a2ae208SSatish Balay #undef __FUNCT__ 18814a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1882dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 18837fc3c18eSBarry Smith { 18847fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 18857fc3c18eSBarry Smith Mat a,b,c,d; 18867fc3c18eSBarry Smith PetscTruth flg; 1887dfbe8321SBarry Smith PetscErrorCode ierr; 18887fc3c18eSBarry Smith 18897fc3c18eSBarry Smith PetscFunctionBegin; 18907fc3c18eSBarry Smith a = matA->A; b = matA->B; 18917fc3c18eSBarry Smith c = matB->A; d = matB->B; 18927fc3c18eSBarry Smith 18937fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1894abc0a331SBarry Smith if (flg) { 18957fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18967fc3c18eSBarry Smith } 18977fc3c18eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 18987fc3c18eSBarry Smith PetscFunctionReturn(0); 18997fc3c18eSBarry Smith } 19007fc3c18eSBarry Smith 1901273d9f13SBarry Smith 19024a2ae208SSatish Balay #undef __FUNCT__ 19034a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1904dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1905273d9f13SBarry Smith { 1906dfbe8321SBarry Smith PetscErrorCode ierr; 1907273d9f13SBarry Smith 1908273d9f13SBarry Smith PetscFunctionBegin; 1909273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1910273d9f13SBarry Smith PetscFunctionReturn(0); 1911273d9f13SBarry Smith } 1912273d9f13SBarry Smith 191379bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 1914cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = { 1915cc2dc46cSBarry Smith MatSetValues_MPIBAIJ, 1916cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 1917cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 1918cc2dc46cSBarry Smith MatMult_MPIBAIJ, 191997304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 19207c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 19217c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 1922cc2dc46cSBarry Smith 0, 1923cc2dc46cSBarry Smith 0, 1924cc2dc46cSBarry Smith 0, 192597304618SKris Buschelman /*10*/ 0, 1926cc2dc46cSBarry Smith 0, 1927cc2dc46cSBarry Smith 0, 1928cc2dc46cSBarry Smith 0, 1929cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 193097304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 19317fc3c18eSBarry Smith MatEqual_MPIBAIJ, 1932cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 1933cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 1934cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 193597304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 1936cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 1937cc2dc46cSBarry Smith 0, 1938cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 1939cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 194097304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ, 1941cc2dc46cSBarry Smith 0, 1942cc2dc46cSBarry Smith 0, 1943cc2dc46cSBarry Smith 0, 1944cc2dc46cSBarry Smith 0, 194597304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ, 1946273d9f13SBarry Smith 0, 1947cc2dc46cSBarry Smith 0, 1948cc2dc46cSBarry Smith 0, 1949cc2dc46cSBarry Smith 0, 195097304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ, 1951cc2dc46cSBarry Smith 0, 1952cc2dc46cSBarry Smith 0, 1953cc2dc46cSBarry Smith 0, 1954cc2dc46cSBarry Smith 0, 195597304618SKris Buschelman /*40*/ 0, 1956cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 1957cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 1958cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 1959cc2dc46cSBarry Smith 0, 196097304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ, 1961cc2dc46cSBarry Smith MatScale_MPIBAIJ, 1962cc2dc46cSBarry Smith 0, 1963cc2dc46cSBarry Smith 0, 1964cc2dc46cSBarry Smith 0, 1965521d7252SBarry Smith /*50*/ 0, 1966cc2dc46cSBarry Smith 0, 1967cc2dc46cSBarry Smith 0, 1968cc2dc46cSBarry Smith 0, 1969cc2dc46cSBarry Smith 0, 197097304618SKris Buschelman /*55*/ 0, 1971cc2dc46cSBarry Smith 0, 1972cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 1973cc2dc46cSBarry Smith 0, 1974cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 197597304618SKris Buschelman /*60*/ 0, 1976f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 1977f14a1c24SBarry Smith MatView_MPIBAIJ, 19788a124369SBarry Smith MatGetPetscMaps_Petsc, 19797843d17aSBarry Smith 0, 198097304618SKris Buschelman /*65*/ 0, 19817843d17aSBarry Smith 0, 19827843d17aSBarry Smith 0, 19837843d17aSBarry Smith 0, 19847843d17aSBarry Smith 0, 198597304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ, 19867843d17aSBarry Smith 0, 198797304618SKris Buschelman 0, 198897304618SKris Buschelman 0, 198997304618SKris Buschelman 0, 199097304618SKris Buschelman /*75*/ 0, 199197304618SKris Buschelman 0, 199297304618SKris Buschelman 0, 199397304618SKris Buschelman 0, 199497304618SKris Buschelman 0, 199597304618SKris Buschelman /*80*/ 0, 199697304618SKris Buschelman 0, 199797304618SKris Buschelman 0, 199897304618SKris Buschelman 0, 1999865e5f61SKris Buschelman MatLoad_MPIBAIJ, 2000865e5f61SKris Buschelman /*85*/ 0, 2001865e5f61SKris Buschelman 0, 2002865e5f61SKris Buschelman 0, 2003865e5f61SKris Buschelman 0, 2004865e5f61SKris Buschelman 0, 2005865e5f61SKris Buschelman /*90*/ 0, 2006865e5f61SKris Buschelman 0, 2007865e5f61SKris Buschelman 0, 2008865e5f61SKris Buschelman 0, 2009865e5f61SKris Buschelman 0, 2010865e5f61SKris Buschelman /*95*/ 0, 2011865e5f61SKris Buschelman 0, 2012865e5f61SKris Buschelman 0, 2013865e5f61SKris Buschelman 0}; 201479bdfe76SSatish Balay 20155ef9f2a5SBarry Smith 2016e18c124aSSatish Balay EXTERN_C_BEGIN 20174a2ae208SSatish Balay #undef __FUNCT__ 20184a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2019dfbe8321SBarry Smith PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 20205ef9f2a5SBarry Smith { 20215ef9f2a5SBarry Smith PetscFunctionBegin; 20225ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 20235ef9f2a5SBarry Smith *iscopy = PETSC_FALSE; 20245ef9f2a5SBarry Smith PetscFunctionReturn(0); 20255ef9f2a5SBarry Smith } 2026e18c124aSSatish Balay EXTERN_C_END 202779bdfe76SSatish Balay 2028273d9f13SBarry Smith EXTERN_C_BEGIN 2029d94109b8SHong Zhang extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2030d94109b8SHong Zhang EXTERN_C_END 2031d94109b8SHong Zhang 2032aac34f13SBarry Smith #undef __FUNCT__ 2033aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2034aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2035aac34f13SBarry Smith { 2036aac34f13SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2037aac34f13SBarry Smith PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2038aac34f13SBarry Smith PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2039aac34f13SBarry Smith const PetscInt *JJ; 2040aac34f13SBarry Smith PetscScalar *values; 2041aac34f13SBarry Smith PetscErrorCode ierr; 2042aac34f13SBarry Smith 2043aac34f13SBarry Smith PetscFunctionBegin; 2044aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2045aac34f13SBarry Smith if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2046aac34f13SBarry Smith #endif 2047aac34f13SBarry Smith ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2048aac34f13SBarry Smith o_nnz = d_nnz + m; 2049aac34f13SBarry Smith 2050aac34f13SBarry Smith for (i=0; i<m; i++) { 2051aac34f13SBarry Smith nnz = I[i+1]- I[i]; 2052aac34f13SBarry Smith JJ = J + I[i]; 2053aac34f13SBarry Smith nnz_max = PetscMax(nnz_max,nnz); 2054aac34f13SBarry Smith #if defined(PETSC_OPT_g) 2055aac34f13SBarry Smith if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2056aac34f13SBarry Smith #endif 2057aac34f13SBarry Smith for (j=0; j<nnz; j++) { 2058aac34f13SBarry Smith if (*JJ >= cstart) break; 2059aac34f13SBarry Smith JJ++; 2060aac34f13SBarry Smith } 2061aac34f13SBarry Smith d = 0; 2062aac34f13SBarry Smith for (; j<nnz; j++) { 2063aac34f13SBarry Smith if (*JJ++ >= cend) break; 2064aac34f13SBarry Smith d++; 2065aac34f13SBarry Smith } 2066aac34f13SBarry Smith d_nnz[i] = d; 2067aac34f13SBarry Smith o_nnz[i] = nnz - d; 2068aac34f13SBarry Smith } 2069aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2070aac34f13SBarry Smith ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2071aac34f13SBarry Smith 2072aac34f13SBarry Smith if (v) values = (PetscScalar*)v; 2073aac34f13SBarry Smith else { 2074aac34f13SBarry Smith ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2075aac34f13SBarry Smith ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2076aac34f13SBarry Smith } 2077aac34f13SBarry Smith 2078aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2079aac34f13SBarry Smith for (i=0; i<m; i++) { 2080aac34f13SBarry Smith ii = i + rstart; 2081aac34f13SBarry Smith nnz = I[i+1]- I[i]; 2082aac34f13SBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2083aac34f13SBarry Smith } 2084aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2085aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086aac34f13SBarry Smith ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2087aac34f13SBarry Smith 2088aac34f13SBarry Smith if (!v) { 2089aac34f13SBarry Smith ierr = PetscFree(values);CHKERRQ(ierr); 2090aac34f13SBarry Smith } 2091aac34f13SBarry Smith PetscFunctionReturn(0); 2092aac34f13SBarry Smith } 2093aac34f13SBarry Smith 2094aac34f13SBarry Smith #undef __FUNCT__ 2095aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2096aac34f13SBarry Smith /*@C 2097aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2098aac34f13SBarry Smith (the default parallel PETSc format). 2099aac34f13SBarry Smith 2100aac34f13SBarry Smith Collective on MPI_Comm 2101aac34f13SBarry Smith 2102aac34f13SBarry Smith Input Parameters: 2103aac34f13SBarry Smith + A - the matrix 2104aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2105aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2106aac34f13SBarry Smith - v - optional values in the matrix 2107aac34f13SBarry Smith 2108aac34f13SBarry Smith Level: developer 2109aac34f13SBarry Smith 2110aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2111aac34f13SBarry Smith 2112aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2113aac34f13SBarry Smith @*/ 2114aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2115aac34f13SBarry Smith { 2116aac34f13SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2117aac34f13SBarry Smith 2118aac34f13SBarry Smith PetscFunctionBegin; 2119aac34f13SBarry Smith ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2120aac34f13SBarry Smith if (f) { 2121aac34f13SBarry Smith ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2122aac34f13SBarry Smith } 2123aac34f13SBarry Smith PetscFunctionReturn(0); 2124aac34f13SBarry Smith } 2125aac34f13SBarry Smith 2126d94109b8SHong Zhang EXTERN_C_BEGIN 21274a2ae208SSatish Balay #undef __FUNCT__ 2128a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2129b24ad042SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2130a23d5eceSKris Buschelman { 2131a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2132dfbe8321SBarry Smith PetscErrorCode ierr; 2133b24ad042SBarry Smith PetscInt i; 2134a23d5eceSKris Buschelman 2135a23d5eceSKris Buschelman PetscFunctionBegin; 2136a23d5eceSKris Buschelman B->preallocated = PETSC_TRUE; 2137a23d5eceSKris Buschelman ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2138a23d5eceSKris Buschelman 2139a23d5eceSKris Buschelman if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2140a23d5eceSKris Buschelman if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2141a23d5eceSKris Buschelman if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 214277431f27SBarry Smith if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 214377431f27SBarry Smith if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2144a23d5eceSKris Buschelman if (d_nnz) { 2145a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 214677431f27SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2147a23d5eceSKris Buschelman } 2148a23d5eceSKris Buschelman } 2149a23d5eceSKris Buschelman if (o_nnz) { 2150a23d5eceSKris Buschelman for (i=0; i<B->m/bs; i++) { 215177431f27SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2152a23d5eceSKris Buschelman } 2153a23d5eceSKris Buschelman } 2154a23d5eceSKris Buschelman 2155a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2156a23d5eceSKris Buschelman ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2157a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2158a23d5eceSKris Buschelman ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2159a23d5eceSKris Buschelman 2160a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2161521d7252SBarry Smith B->bs = bs; 2162a23d5eceSKris Buschelman b->bs2 = bs*bs; 2163a23d5eceSKris Buschelman b->mbs = B->m/bs; 2164a23d5eceSKris Buschelman b->nbs = B->n/bs; 2165a23d5eceSKris Buschelman b->Mbs = B->M/bs; 2166a23d5eceSKris Buschelman b->Nbs = B->N/bs; 2167a23d5eceSKris Buschelman 2168b24ad042SBarry Smith ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2169a23d5eceSKris Buschelman b->rowners[0] = 0; 2170a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2171a23d5eceSKris Buschelman b->rowners[i] += b->rowners[i-1]; 2172a23d5eceSKris Buschelman } 2173a23d5eceSKris Buschelman b->rstart = b->rowners[b->rank]; 2174a23d5eceSKris Buschelman b->rend = b->rowners[b->rank+1]; 2175a23d5eceSKris Buschelman 2176b24ad042SBarry Smith ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2177a23d5eceSKris Buschelman b->cowners[0] = 0; 2178a23d5eceSKris Buschelman for (i=2; i<=b->size; i++) { 2179a23d5eceSKris Buschelman b->cowners[i] += b->cowners[i-1]; 2180a23d5eceSKris Buschelman } 2181a23d5eceSKris Buschelman b->cstart = b->cowners[b->rank]; 2182a23d5eceSKris Buschelman b->cend = b->cowners[b->rank+1]; 2183a23d5eceSKris Buschelman 2184a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2185a23d5eceSKris Buschelman b->rowners_bs[i] = b->rowners[i]*bs; 2186a23d5eceSKris Buschelman } 2187a23d5eceSKris Buschelman b->rstart_bs = b->rstart*bs; 2188a23d5eceSKris Buschelman b->rend_bs = b->rend*bs; 2189a23d5eceSKris Buschelman b->cstart_bs = b->cstart*bs; 2190a23d5eceSKris Buschelman b->cend_bs = b->cend*bs; 2191a23d5eceSKris Buschelman 21929c097c71SKris Buschelman ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 21939c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2194c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 21959c097c71SKris Buschelman PetscLogObjectParent(B,b->A); 21969c097c71SKris Buschelman ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 21979c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2198c60e587dSKris Buschelman ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 21999c097c71SKris Buschelman PetscLogObjectParent(B,b->B); 2200c60e587dSKris Buschelman 2201a23d5eceSKris Buschelman ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2202a23d5eceSKris Buschelman 2203a23d5eceSKris Buschelman PetscFunctionReturn(0); 2204a23d5eceSKris Buschelman } 2205a23d5eceSKris Buschelman EXTERN_C_END 2206a23d5eceSKris Buschelman 2207a23d5eceSKris Buschelman EXTERN_C_BEGIN 2208dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2209dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 221092b32695SKris Buschelman EXTERN_C_END 22115bf65638SKris Buschelman 22120bad9183SKris Buschelman /*MC 2213fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 22140bad9183SKris Buschelman 22150bad9183SKris Buschelman Options Database Keys: 22160bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 22170bad9183SKris Buschelman 22180bad9183SKris Buschelman Level: beginner 22190bad9183SKris Buschelman 22200bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 22210bad9183SKris Buschelman M*/ 22220bad9183SKris Buschelman 222392b32695SKris Buschelman EXTERN_C_BEGIN 2224a23d5eceSKris Buschelman #undef __FUNCT__ 22254a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 2226dfbe8321SBarry Smith PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2227273d9f13SBarry Smith { 2228273d9f13SBarry Smith Mat_MPIBAIJ *b; 2229dfbe8321SBarry Smith PetscErrorCode ierr; 2230273d9f13SBarry Smith PetscTruth flg; 2231273d9f13SBarry Smith 2232273d9f13SBarry Smith PetscFunctionBegin; 223382502324SSatish Balay ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 223482502324SSatish Balay B->data = (void*)b; 223582502324SSatish Balay 2236273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2237273d9f13SBarry Smith B->mapping = 0; 2238273d9f13SBarry Smith B->factor = 0; 2239273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2240273d9f13SBarry Smith 2241273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 2242273d9f13SBarry Smith ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2243273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2244273d9f13SBarry Smith 2245273d9f13SBarry Smith /* build local table of row and column ownerships */ 2246b24ad042SBarry Smith ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2247b24ad042SBarry Smith PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2248273d9f13SBarry Smith b->cowners = b->rowners + b->size + 2; 2249273d9f13SBarry Smith b->rowners_bs = b->cowners + b->size + 2; 2250273d9f13SBarry Smith 2251273d9f13SBarry Smith /* build cache for off array entries formed */ 2252273d9f13SBarry Smith ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2253273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 2254273d9f13SBarry Smith b->colmap = PETSC_NULL; 2255273d9f13SBarry Smith b->garray = PETSC_NULL; 2256273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2257273d9f13SBarry Smith 2258cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 2259273d9f13SBarry Smith /* stuff for MatSetValues_XXX in single precision */ 226064a35ccbSBarry Smith b->setvalueslen = 0; 2261273d9f13SBarry Smith b->setvaluescopy = PETSC_NULL; 2262273d9f13SBarry Smith #endif 2263273d9f13SBarry Smith 2264273d9f13SBarry Smith /* stuff used in block assembly */ 2265273d9f13SBarry Smith b->barray = 0; 2266273d9f13SBarry Smith 2267273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2268273d9f13SBarry Smith b->lvec = 0; 2269273d9f13SBarry Smith b->Mvctx = 0; 2270273d9f13SBarry Smith 2271273d9f13SBarry Smith /* stuff for MatGetRow() */ 2272273d9f13SBarry Smith b->rowindices = 0; 2273273d9f13SBarry Smith b->rowvalues = 0; 2274273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2275273d9f13SBarry Smith 2276273d9f13SBarry Smith /* hash table stuff */ 2277273d9f13SBarry Smith b->ht = 0; 2278273d9f13SBarry Smith b->hd = 0; 2279273d9f13SBarry Smith b->ht_size = 0; 2280273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2281273d9f13SBarry Smith b->ht_fact = 0; 2282273d9f13SBarry Smith b->ht_total_ct = 0; 2283273d9f13SBarry Smith b->ht_insert_ct = 0; 2284273d9f13SBarry Smith 2285b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2286273d9f13SBarry Smith if (flg) { 2287f6275e2eSBarry Smith PetscReal fact = 1.39; 2288273d9f13SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 228987828ca2SBarry Smith ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2290273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 2291273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2292b0a32e0cSBarry Smith PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2293273d9f13SBarry Smith } 2294273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2295273d9f13SBarry Smith "MatStoreValues_MPIBAIJ", 2296273d9f13SBarry Smith MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2297273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2298273d9f13SBarry Smith "MatRetrieveValues_MPIBAIJ", 2299273d9f13SBarry Smith MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2300273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2301273d9f13SBarry Smith "MatGetDiagonalBlock_MPIBAIJ", 2302273d9f13SBarry Smith MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2303a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2304a23d5eceSKris Buschelman "MatMPIBAIJSetPreallocation_MPIBAIJ", 2305a23d5eceSKris Buschelman MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2306aac34f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2307aac34f13SBarry Smith "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2308aac34f13SBarry Smith MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 230992b32695SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 231092b32695SKris Buschelman "MatDiagonalScaleLocal_MPIBAIJ", 231192b32695SKris Buschelman MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 23125bf65638SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 23135bf65638SKris Buschelman "MatSetHashTableFactor_MPIBAIJ", 23145bf65638SKris Buschelman MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2315273d9f13SBarry Smith PetscFunctionReturn(0); 2316273d9f13SBarry Smith } 2317273d9f13SBarry Smith EXTERN_C_END 2318273d9f13SBarry Smith 2319209238afSKris Buschelman /*MC 2320002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2321209238afSKris Buschelman 2322209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2323209238afSKris Buschelman and MATMPIBAIJ otherwise. 2324209238afSKris Buschelman 2325209238afSKris Buschelman Options Database Keys: 2326209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2327209238afSKris Buschelman 2328209238afSKris Buschelman Level: beginner 2329209238afSKris Buschelman 2330aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2331209238afSKris Buschelman M*/ 2332209238afSKris Buschelman 2333209238afSKris Buschelman EXTERN_C_BEGIN 2334209238afSKris Buschelman #undef __FUNCT__ 2335209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ" 2336dfbe8321SBarry Smith PetscErrorCode MatCreate_BAIJ(Mat A) 2337dfbe8321SBarry Smith { 23386849ba73SBarry Smith PetscErrorCode ierr; 2339b24ad042SBarry Smith PetscMPIInt size; 2340209238afSKris Buschelman 2341209238afSKris Buschelman PetscFunctionBegin; 2342209238afSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2343209238afSKris Buschelman ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2344209238afSKris Buschelman if (size == 1) { 2345209238afSKris Buschelman ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2346209238afSKris Buschelman } else { 2347209238afSKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2348209238afSKris Buschelman } 2349209238afSKris Buschelman PetscFunctionReturn(0); 2350209238afSKris Buschelman } 2351209238afSKris Buschelman EXTERN_C_END 2352209238afSKris Buschelman 23534a2ae208SSatish Balay #undef __FUNCT__ 23544a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2355273d9f13SBarry Smith /*@C 2356aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2357273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2358273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2359273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2360273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2361273d9f13SBarry Smith 2362273d9f13SBarry Smith Collective on Mat 2363273d9f13SBarry Smith 2364273d9f13SBarry Smith Input Parameters: 2365273d9f13SBarry Smith + A - the matrix 2366273d9f13SBarry Smith . bs - size of blockk 2367273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2368273d9f13SBarry Smith submatrix (same for all local rows) 2369273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2370273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 2371273d9f13SBarry Smith row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2372273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2373273d9f13SBarry Smith submatrix (same for all local rows). 2374273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2375273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 2376273d9f13SBarry Smith each block row) or PETSC_NULL. 2377273d9f13SBarry Smith 237849a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 2379273d9f13SBarry Smith 2380273d9f13SBarry Smith Options Database Keys: 2381273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 2382273d9f13SBarry Smith block calculations (much slower) 2383273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 2384273d9f13SBarry Smith 2385273d9f13SBarry Smith Notes: 2386273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2387273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2388273d9f13SBarry Smith 2389273d9f13SBarry Smith Storage Information: 2390273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2391273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2392273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2393273d9f13SBarry Smith local matrix (a rectangular submatrix). 2394273d9f13SBarry Smith 2395273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2396273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 2397273d9f13SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2398273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2399273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2400273d9f13SBarry Smith 2401273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2402273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2403273d9f13SBarry Smith 2404273d9f13SBarry Smith .vb 2405273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2406273d9f13SBarry Smith ------------------- 2407273d9f13SBarry Smith row 3 | o o o d d d o o o o o o 2408273d9f13SBarry Smith row 4 | o o o d d d o o o o o o 2409273d9f13SBarry Smith row 5 | o o o d d d o o o o o o 2410273d9f13SBarry Smith ------------------- 2411273d9f13SBarry Smith .ve 2412273d9f13SBarry Smith 2413273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2414273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2415273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 2416273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 2417273d9f13SBarry Smith 2418273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2419273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2420273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2421273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2422273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2423273d9f13SBarry Smith matrices. 2424273d9f13SBarry Smith 2425273d9f13SBarry Smith Level: intermediate 2426273d9f13SBarry Smith 2427273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 2428273d9f13SBarry Smith 2429aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2430273d9f13SBarry Smith @*/ 2431b24ad042SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2432273d9f13SBarry Smith { 2433b24ad042SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2434273d9f13SBarry Smith 2435273d9f13SBarry Smith PetscFunctionBegin; 2436a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2437a23d5eceSKris Buschelman if (f) { 2438a23d5eceSKris Buschelman ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2439273d9f13SBarry Smith } 2440273d9f13SBarry Smith PetscFunctionReturn(0); 2441273d9f13SBarry Smith } 2442273d9f13SBarry Smith 24434a2ae208SSatish Balay #undef __FUNCT__ 24444a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ" 244579bdfe76SSatish Balay /*@C 244679bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 244779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 244879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 244979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 245079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 245179bdfe76SSatish Balay 2452db81eaa0SLois Curfman McInnes Collective on MPI_Comm 2453db81eaa0SLois Curfman McInnes 245479bdfe76SSatish Balay Input Parameters: 2455db81eaa0SLois Curfman McInnes + comm - MPI communicator 245679bdfe76SSatish Balay . bs - size of blockk 245779bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 245892e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 245992e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 246092e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 246192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 246292e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 2463be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2464be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 246547a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 246679bdfe76SSatish Balay submatrix (same for all local rows) 246747a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 246892e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 2469db81eaa0SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 247047a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 247179bdfe76SSatish Balay submatrix (same for all local rows). 247247a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 247392e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 247492e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 247579bdfe76SSatish Balay 247679bdfe76SSatish Balay Output Parameter: 247779bdfe76SSatish Balay . A - the matrix 247879bdfe76SSatish Balay 2479db81eaa0SLois Curfman McInnes Options Database Keys: 2480db81eaa0SLois Curfman McInnes . -mat_no_unroll - uses code that does not unroll the loops in the 2481db81eaa0SLois Curfman McInnes block calculations (much slower) 2482db81eaa0SLois Curfman McInnes . -mat_block_size - size of the blocks to use 24833ffaccefSLois Curfman McInnes 2484b259b22eSLois Curfman McInnes Notes: 248549a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 248649a6f317SBarry Smith 248747a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 248847a75d0bSBarry Smith 248979bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 249079bdfe76SSatish Balay (possibly both). 249179bdfe76SSatish Balay 2492be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2493be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 2494be79a94dSBarry Smith 249579bdfe76SSatish Balay Storage Information: 249679bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 249779bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 249879bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 249979bdfe76SSatish Balay local matrix (a rectangular submatrix). 250079bdfe76SSatish Balay 250179bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 250279bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 250379bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 250479bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 250579bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 250679bdfe76SSatish Balay 250779bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 250879bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 250979bdfe76SSatish Balay 2510db81eaa0SLois Curfman McInnes .vb 2511db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 2512db81eaa0SLois Curfman McInnes ------------------- 2513db81eaa0SLois Curfman McInnes row 3 | o o o d d d o o o o o o 2514db81eaa0SLois Curfman McInnes row 4 | o o o d d d o o o o o o 2515db81eaa0SLois Curfman McInnes row 5 | o o o d d d o o o o o o 2516db81eaa0SLois Curfman McInnes ------------------- 2517db81eaa0SLois Curfman McInnes .ve 251879bdfe76SSatish Balay 251979bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 252079bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 252179bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 252257b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 252379bdfe76SSatish Balay 2524d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2525d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 252679bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 252792e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 252892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 25296da5968aSLois Curfman McInnes matrices. 253079bdfe76SSatish Balay 2531027ccd11SLois Curfman McInnes Level: intermediate 2532027ccd11SLois Curfman McInnes 253392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 253479bdfe76SSatish Balay 2535aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 253679bdfe76SSatish Balay @*/ 2537b24ad042SBarry Smith PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 253879bdfe76SSatish Balay { 25396849ba73SBarry Smith PetscErrorCode ierr; 2540b24ad042SBarry Smith PetscMPIInt size; 254179bdfe76SSatish Balay 2542d64ed03dSBarry Smith PetscFunctionBegin; 2543273d9f13SBarry Smith ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2544d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2545273d9f13SBarry Smith if (size > 1) { 2546273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2547273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2548273d9f13SBarry Smith } else { 2549273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2550273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 25513914022bSBarry Smith } 25523a40ed3dSBarry Smith PetscFunctionReturn(0); 255379bdfe76SSatish Balay } 2554026e39d0SSatish Balay 25554a2ae208SSatish Balay #undef __FUNCT__ 25564a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 25576849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 25580ac07820SSatish Balay { 25590ac07820SSatish Balay Mat mat; 25600ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2561dfbe8321SBarry Smith PetscErrorCode ierr; 2562b24ad042SBarry Smith PetscInt len=0; 25630ac07820SSatish Balay 2564d64ed03dSBarry Smith PetscFunctionBegin; 25650ac07820SSatish Balay *newmat = 0; 2566273d9f13SBarry Smith ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2567be5d1d56SKris Buschelman ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 25681d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 25697fff6886SHong Zhang 25704beb1cfeSHong Zhang mat->factor = matin->factor; 2571273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 25720ac07820SSatish Balay mat->assembled = PETSC_TRUE; 25737fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 25747fff6886SHong Zhang 2575273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 2576521d7252SBarry Smith mat->bs = matin->bs; 25770ac07820SSatish Balay a->bs2 = oldmat->bs2; 25780ac07820SSatish Balay a->mbs = oldmat->mbs; 25790ac07820SSatish Balay a->nbs = oldmat->nbs; 25800ac07820SSatish Balay a->Mbs = oldmat->Mbs; 25810ac07820SSatish Balay a->Nbs = oldmat->Nbs; 25820ac07820SSatish Balay 25830ac07820SSatish Balay a->rstart = oldmat->rstart; 25840ac07820SSatish Balay a->rend = oldmat->rend; 25850ac07820SSatish Balay a->cstart = oldmat->cstart; 25860ac07820SSatish Balay a->cend = oldmat->cend; 25870ac07820SSatish Balay a->size = oldmat->size; 25880ac07820SSatish Balay a->rank = oldmat->rank; 2589aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 2590aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 2591aef5e8e0SSatish Balay a->rowindices = 0; 25920ac07820SSatish Balay a->rowvalues = 0; 25930ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 259430793edcSSatish Balay a->barray = 0; 25953123a43fSSatish Balay a->rstart_bs = oldmat->rstart_bs; 25963123a43fSSatish Balay a->rend_bs = oldmat->rend_bs; 25973123a43fSSatish Balay a->cstart_bs = oldmat->cstart_bs; 25983123a43fSSatish Balay a->cend_bs = oldmat->cend_bs; 25990ac07820SSatish Balay 2600133cdb44SSatish Balay /* hash table stuff */ 2601133cdb44SSatish Balay a->ht = 0; 2602133cdb44SSatish Balay a->hd = 0; 2603133cdb44SSatish Balay a->ht_size = 0; 2604133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 260525fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 2606133cdb44SSatish Balay a->ht_total_ct = 0; 2607133cdb44SSatish Balay a->ht_insert_ct = 0; 2608133cdb44SSatish Balay 2609b24ad042SBarry Smith ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 26108798bf22SSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2611521d7252SBarry Smith ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 26120ac07820SSatish Balay if (oldmat->colmap) { 2613aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 26140f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 261548e59246SSatish Balay #else 2616b24ad042SBarry Smith ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2617b24ad042SBarry Smith PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt)); 2618b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 261948e59246SSatish Balay #endif 26200ac07820SSatish Balay } else a->colmap = 0; 26214beb1cfeSHong Zhang 26220ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2623b24ad042SBarry Smith ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2624b24ad042SBarry Smith PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 2625b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 26260ac07820SSatish Balay } else a->garray = 0; 26270ac07820SSatish Balay 26280ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2629b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->lvec); 26300ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2631b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->Mvctx); 26327fff6886SHong Zhang 26332e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2634b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->A); 26352e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2636b0a32e0cSBarry Smith PetscLogObjectParent(mat,a->B); 2637b0a32e0cSBarry Smith ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 26380ac07820SSatish Balay *newmat = mat; 26394beb1cfeSHong Zhang 26403a40ed3dSBarry Smith PetscFunctionReturn(0); 26410ac07820SSatish Balay } 264257b952d6SSatish Balay 2643e090d566SSatish Balay #include "petscsys.h" 264457b952d6SSatish Balay 26454a2ae208SSatish Balay #undef __FUNCT__ 26464a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ" 2647dfbe8321SBarry Smith PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 264857b952d6SSatish Balay { 264957b952d6SSatish Balay Mat A; 26506849ba73SBarry Smith PetscErrorCode ierr; 2651b24ad042SBarry Smith int fd; 2652b24ad042SBarry Smith PetscInt i,nz,j,rstart,rend; 265387828ca2SBarry Smith PetscScalar *vals,*buf; 265457b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 265557b952d6SSatish Balay MPI_Status status; 2656b24ad042SBarry Smith PetscMPIInt rank,size,maxnz; 2657b24ad042SBarry Smith PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2658dc231df0SBarry Smith PetscInt *locrowlens,*procsnz = 0,*browners; 2659dc231df0SBarry Smith PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows; 2660dc231df0SBarry Smith PetscMPIInt tag = ((PetscObject)viewer)->tag; 2661b24ad042SBarry Smith PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2662dc231df0SBarry Smith PetscInt dcount,kmax,k,nzcount,tmp,mend; 266357b952d6SSatish Balay 2664d64ed03dSBarry Smith PetscFunctionBegin; 2665b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 266657b952d6SSatish Balay 2667d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2668d132466eSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 266957b952d6SSatish Balay if (!rank) { 2670b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2671e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2672552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 26736c5fab8fSBarry Smith } 2674d64ed03dSBarry Smith 2675b24ad042SBarry Smith ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 267657b952d6SSatish Balay M = header[1]; N = header[2]; 267757b952d6SSatish Balay 267829bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 267957b952d6SSatish Balay 268057b952d6SSatish Balay /* 268157b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 268257b952d6SSatish Balay divisible by the blocksize 268357b952d6SSatish Balay */ 268457b952d6SSatish Balay Mbs = M/bs; 2685dc231df0SBarry Smith extra_rows = bs - M + bs*Mbs; 268657b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 268757b952d6SSatish Balay else Mbs++; 268857b952d6SSatish Balay if (extra_rows && !rank) { 2689b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 269057b952d6SSatish Balay } 2691537820f0SBarry Smith 269257b952d6SSatish Balay /* determine ownership of all rows */ 269357b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 269457b952d6SSatish Balay m = mbs*bs; 2695dc231df0SBarry Smith ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2696b24ad042SBarry Smith ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269757b952d6SSatish Balay rowners[0] = 0; 2698cee3aa6bSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2699cee3aa6bSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 270057b952d6SSatish Balay rstart = rowners[rank]; 270157b952d6SSatish Balay rend = rowners[rank+1]; 270257b952d6SSatish Balay 270357b952d6SSatish Balay /* distribute row lengths to all processors */ 2704dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 270557b952d6SSatish Balay if (!rank) { 2706dc231df0SBarry Smith mend = m; 2707dc231df0SBarry Smith if (size == 1) mend = mend - extra_rows; 2708dc231df0SBarry Smith ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2709dc231df0SBarry Smith for (j=mend; j<m; j++) locrowlens[j] = 1; 2710dc231df0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2711b24ad042SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2712b24ad042SBarry Smith ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2713dc231df0SBarry Smith for (j=0; j<m; j++) { 2714dc231df0SBarry Smith procsnz[0] += locrowlens[j]; 2715dc231df0SBarry Smith } 2716dc231df0SBarry Smith for (i=1; i<size; i++) { 2717dc231df0SBarry Smith mend = browners[i+1] - browners[i]; 2718dc231df0SBarry Smith if (i == size-1) mend = mend - extra_rows; 2719dc231df0SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2720dc231df0SBarry Smith for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2721dc231df0SBarry Smith /* calculate the number of nonzeros on each processor */ 2722dc231df0SBarry Smith for (j=0; j<browners[i+1]-browners[i]; j++) { 272357b952d6SSatish Balay procsnz[i] += rowlengths[j]; 272457b952d6SSatish Balay } 2725dc231df0SBarry Smith ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 272657b952d6SSatish Balay } 2727606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2728dc231df0SBarry Smith } else { 2729dc231df0SBarry Smith ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2730dc231df0SBarry Smith } 273157b952d6SSatish Balay 2732dc231df0SBarry Smith if (!rank) { 273357b952d6SSatish Balay /* determine max buffer needed and allocate it */ 27348a8e0b3aSBarry Smith maxnz = procsnz[0]; 2735cdc0ba36SBarry Smith for (i=1; i<size; i++) { 273657b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 273757b952d6SSatish Balay } 2738b24ad042SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 273957b952d6SSatish Balay 274057b952d6SSatish Balay /* read in my part of the matrix column indices */ 274157b952d6SSatish Balay nz = procsnz[0]; 2742b24ad042SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 274357b952d6SSatish Balay mycols = ibuf; 2744cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2745e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2746cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2747cee3aa6bSSatish Balay 274857b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 274957b952d6SSatish Balay for (i=1; i<size-1; i++) { 275057b952d6SSatish Balay nz = procsnz[i]; 2751e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2752b24ad042SBarry Smith ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 275357b952d6SSatish Balay } 275457b952d6SSatish Balay /* read in the stuff for the last proc */ 275557b952d6SSatish Balay if (size != 1) { 275657b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2757e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 275857b952d6SSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2759b24ad042SBarry Smith ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 276057b952d6SSatish Balay } 2761606d414cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2762d64ed03dSBarry Smith } else { 276357b952d6SSatish Balay /* determine buffer space needed for message */ 276457b952d6SSatish Balay nz = 0; 276557b952d6SSatish Balay for (i=0; i<m; i++) { 276657b952d6SSatish Balay nz += locrowlens[i]; 276757b952d6SSatish Balay } 2768b24ad042SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 276957b952d6SSatish Balay mycols = ibuf; 277057b952d6SSatish Balay /* receive message of column indices*/ 2771b24ad042SBarry Smith ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2772b24ad042SBarry Smith ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 277329bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 277457b952d6SSatish Balay } 277557b952d6SSatish Balay 277657b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2777dc231df0SBarry Smith ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2778dc231df0SBarry Smith ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2779dc231df0SBarry Smith ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2780dc231df0SBarry Smith ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2781dc231df0SBarry Smith ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 278257b952d6SSatish Balay rowcount = 0; nzcount = 0; 278357b952d6SSatish Balay for (i=0; i<mbs; i++) { 278457b952d6SSatish Balay dcount = 0; 278557b952d6SSatish Balay odcount = 0; 278657b952d6SSatish Balay for (j=0; j<bs; j++) { 278757b952d6SSatish Balay kmax = locrowlens[rowcount]; 278857b952d6SSatish Balay for (k=0; k<kmax; k++) { 278957b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 279057b952d6SSatish Balay if (!mask[tmp]) { 279157b952d6SSatish Balay mask[tmp] = 1; 279257b952d6SSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 279357b952d6SSatish Balay else masked1[dcount++] = tmp; 279457b952d6SSatish Balay } 279557b952d6SSatish Balay } 279657b952d6SSatish Balay rowcount++; 279757b952d6SSatish Balay } 2798cee3aa6bSSatish Balay 279957b952d6SSatish Balay dlens[i] = dcount; 280057b952d6SSatish Balay odlens[i] = odcount; 2801cee3aa6bSSatish Balay 280257b952d6SSatish Balay /* zero out the mask elements we set */ 280357b952d6SSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 280457b952d6SSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 280557b952d6SSatish Balay } 2806cee3aa6bSSatish Balay 280757b952d6SSatish Balay /* create our matrix */ 280878ae41b4SKris Buschelman ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 280978ae41b4SKris Buschelman ierr = MatSetType(A,type);CHKERRQ(ierr) 281078ae41b4SKris Buschelman ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 281178ae41b4SKris Buschelman 281278ae41b4SKris Buschelman /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2813dc231df0SBarry Smith ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 281457b952d6SSatish Balay 281557b952d6SSatish Balay if (!rank) { 281687828ca2SBarry Smith ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 281757b952d6SSatish Balay /* read in my part of the matrix numerical values */ 281857b952d6SSatish Balay nz = procsnz[0]; 281957b952d6SSatish Balay vals = buf; 2820cee3aa6bSSatish Balay mycols = ibuf; 2821cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 2822e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2823cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2824537820f0SBarry Smith 282557b952d6SSatish Balay /* insert into matrix */ 282657b952d6SSatish Balay jj = rstart*bs; 282757b952d6SSatish Balay for (i=0; i<m; i++) { 2828dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 282957b952d6SSatish Balay mycols += locrowlens[i]; 283057b952d6SSatish Balay vals += locrowlens[i]; 283157b952d6SSatish Balay jj++; 283257b952d6SSatish Balay } 283357b952d6SSatish Balay /* read in other processors (except the last one) and ship out */ 283457b952d6SSatish Balay for (i=1; i<size-1; i++) { 283557b952d6SSatish Balay nz = procsnz[i]; 283657b952d6SSatish Balay vals = buf; 2837e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2838ca161407SBarry Smith ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 283957b952d6SSatish Balay } 284057b952d6SSatish Balay /* the last proc */ 284157b952d6SSatish Balay if (size != 1){ 284257b952d6SSatish Balay nz = procsnz[i] - extra_rows; 2843cee3aa6bSSatish Balay vals = buf; 2844e5638eb3SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 284557b952d6SSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2846ca161407SBarry Smith ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 284757b952d6SSatish Balay } 2848606d414cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2849d64ed03dSBarry Smith } else { 285057b952d6SSatish Balay /* receive numeric values */ 285187828ca2SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 285257b952d6SSatish Balay 285357b952d6SSatish Balay /* receive message of values*/ 285457b952d6SSatish Balay vals = buf; 2855cee3aa6bSSatish Balay mycols = ibuf; 2856ca161407SBarry Smith ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2857ca161407SBarry Smith ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 285829bbc08cSBarry Smith if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 285957b952d6SSatish Balay 286057b952d6SSatish Balay /* insert into matrix */ 286157b952d6SSatish Balay jj = rstart*bs; 2862cee3aa6bSSatish Balay for (i=0; i<m; i++) { 2863dc231df0SBarry Smith ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 286457b952d6SSatish Balay mycols += locrowlens[i]; 286557b952d6SSatish Balay vals += locrowlens[i]; 286657b952d6SSatish Balay jj++; 286757b952d6SSatish Balay } 286857b952d6SSatish Balay } 2869606d414cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2870606d414cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2871606d414cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2872dc231df0SBarry Smith ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2873dc231df0SBarry Smith ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2874dc231df0SBarry Smith ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 28756d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28766d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287778ae41b4SKris Buschelman 287878ae41b4SKris Buschelman *newmat = A; 28793a40ed3dSBarry Smith PetscFunctionReturn(0); 288057b952d6SSatish Balay } 288157b952d6SSatish Balay 28824a2ae208SSatish Balay #undef __FUNCT__ 28834a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2884133cdb44SSatish Balay /*@ 2885133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2886133cdb44SSatish Balay 2887133cdb44SSatish Balay Input Parameters: 2888133cdb44SSatish Balay . mat - the matrix 2889133cdb44SSatish Balay . fact - factor 2890133cdb44SSatish Balay 2891fee21e36SBarry Smith Collective on Mat 2892fee21e36SBarry Smith 28938c890885SBarry Smith Level: advanced 28948c890885SBarry Smith 2895133cdb44SSatish Balay Notes: 2896133cdb44SSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2897133cdb44SSatish Balay 2898133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 2899133cdb44SSatish Balay 2900133cdb44SSatish Balay .seealso: MatSetOption() 2901133cdb44SSatish Balay @*/ 2902dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2903133cdb44SSatish Balay { 2904dfbe8321SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscReal); 29055bf65638SKris Buschelman 29065bf65638SKris Buschelman PetscFunctionBegin; 29075bf65638SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 29085bf65638SKris Buschelman if (f) { 29095bf65638SKris Buschelman ierr = (*f)(mat,fact);CHKERRQ(ierr); 29105bf65638SKris Buschelman } 29115bf65638SKris Buschelman PetscFunctionReturn(0); 29125bf65638SKris Buschelman } 29135bf65638SKris Buschelman 29145bf65638SKris Buschelman #undef __FUNCT__ 29155bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2916dfbe8321SBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 29175bf65638SKris Buschelman { 291825fdafccSSatish Balay Mat_MPIBAIJ *baij; 2919133cdb44SSatish Balay 2920133cdb44SSatish Balay PetscFunctionBegin; 2921133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2922133cdb44SSatish Balay baij->ht_fact = fact; 2923133cdb44SSatish Balay PetscFunctionReturn(0); 2924133cdb44SSatish Balay } 2925f2a5309cSSatish Balay 29264a2ae208SSatish Balay #undef __FUNCT__ 29274a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2928b24ad042SBarry Smith PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2929f2a5309cSSatish Balay { 2930f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2931f2a5309cSSatish Balay PetscFunctionBegin; 2932f2a5309cSSatish Balay *Ad = a->A; 2933f2a5309cSSatish Balay *Ao = a->B; 2934195d93cdSBarry Smith *colmap = a->garray; 2935f2a5309cSSatish Balay PetscFunctionReturn(0); 2936f2a5309cSSatish Balay } 2937